2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/domdec/domdec.h"
46 #include "gromacs/ewald/ewald.h"
47 #include "gromacs/gmxlib/cuda_tools/pmalloc_cuda.h"
48 #include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
49 #include "gromacs/legacyheaders/copyrite.h"
50 #include "gromacs/legacyheaders/force.h"
51 #include "gromacs/legacyheaders/gmx_detect_hardware.h"
52 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
53 #include "gromacs/legacyheaders/inputrec.h"
54 #include "gromacs/legacyheaders/macros.h"
55 #include "gromacs/legacyheaders/md_logging.h"
56 #include "gromacs/legacyheaders/md_support.h"
57 #include "gromacs/legacyheaders/names.h"
58 #include "gromacs/legacyheaders/network.h"
59 #include "gromacs/legacyheaders/nonbonded.h"
60 #include "gromacs/legacyheaders/ns.h"
61 #include "gromacs/legacyheaders/qmmm.h"
62 #include "gromacs/legacyheaders/tables.h"
63 #include "gromacs/legacyheaders/txtdump.h"
64 #include "gromacs/legacyheaders/typedefs.h"
65 #include "gromacs/legacyheaders/types/commrec.h"
66 #include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
67 #include "gromacs/math/calculate-ewald-splitting-coefficient.h"
68 #include "gromacs/math/units.h"
69 #include "gromacs/math/utilities.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/mdlib/forcerec-threading.h"
72 #include "gromacs/mdlib/nb_verlet.h"
73 #include "gromacs/mdlib/nbnxn_atomdata.h"
74 #include "gromacs/mdlib/nbnxn_consts.h"
75 #include "gromacs/mdlib/nbnxn_search.h"
76 #include "gromacs/mdlib/nbnxn_simd.h"
77 #include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
78 #include "gromacs/pbcutil/ishift.h"
79 #include "gromacs/pbcutil/pbc.h"
80 #include "gromacs/simd/simd.h"
81 #include "gromacs/topology/mtop_util.h"
82 #include "gromacs/utility/fatalerror.h"
83 #include "gromacs/utility/smalloc.h"
85 t_forcerec *mk_forcerec(void)
95 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
99 for (i = 0; (i < atnr); i++)
101 for (j = 0; (j < atnr); j++)
103 fprintf(fp, "%2d - %2d", i, j);
106 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
107 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
111 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
112 C12(nbfp, atnr, i, j)/12.0);
119 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
127 snew(nbfp, 3*atnr*atnr);
128 for (i = k = 0; (i < atnr); i++)
130 for (j = 0; (j < atnr); j++, k++)
132 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
133 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
134 /* nbfp now includes the 6.0 derivative prefactor */
135 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
141 snew(nbfp, 2*atnr*atnr);
142 for (i = k = 0; (i < atnr); i++)
144 for (j = 0; (j < atnr); j++, k++)
146 /* nbfp now includes the 6.0/12.0 derivative prefactors */
147 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
148 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
156 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
159 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
161 const real oneOverSix = 1.0 / 6.0;
163 /* For LJ-PME simulations, we correct the energies with the reciprocal space
164 * inside of the cut-off. To do this the non-bonded kernels needs to have
165 * access to the C6-values used on the reciprocal grid in pme.c
169 snew(grid, 2*atnr*atnr);
170 for (i = k = 0; (i < atnr); i++)
172 for (j = 0; (j < atnr); j++, k++)
174 c6i = idef->iparams[i*(atnr+1)].lj.c6;
175 c12i = idef->iparams[i*(atnr+1)].lj.c12;
176 c6j = idef->iparams[j*(atnr+1)].lj.c6;
177 c12j = idef->iparams[j*(atnr+1)].lj.c12;
178 c6 = sqrt(c6i * c6j);
179 if (fr->ljpme_combination_rule == eljpmeLB
180 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
182 sigmai = pow(c12i / c6i, oneOverSix);
183 sigmaj = pow(c12j / c6j, oneOverSix);
184 epsi = c6i * c6i / c12i;
185 epsj = c6j * c6j / c12j;
186 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
188 /* Store the elements at the same relative positions as C6 in nbfp in order
189 * to simplify access in the kernels
191 grid[2*(atnr*i+j)] = c6*6.0;
197 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
201 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
203 const real oneOverSix = 1.0 / 6.0;
206 snew(nbfp, 2*atnr*atnr);
207 for (i = 0; i < atnr; ++i)
209 for (j = 0; j < atnr; ++j)
211 c6i = idef->iparams[i*(atnr+1)].lj.c6;
212 c12i = idef->iparams[i*(atnr+1)].lj.c12;
213 c6j = idef->iparams[j*(atnr+1)].lj.c6;
214 c12j = idef->iparams[j*(atnr+1)].lj.c12;
215 c6 = sqrt(c6i * c6j);
216 c12 = sqrt(c12i * c12j);
217 if (comb_rule == eCOMB_ARITHMETIC
218 && !gmx_numzero(c6) && !gmx_numzero(c12))
220 sigmai = pow(c12i / c6i, oneOverSix);
221 sigmaj = pow(c12j / c6j, oneOverSix);
222 epsi = c6i * c6i / c12i;
223 epsj = c6j * c6j / c12j;
224 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
225 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
227 C6(nbfp, atnr, i, j) = c6*6.0;
228 C12(nbfp, atnr, i, j) = c12*12.0;
234 /* This routine sets fr->solvent_opt to the most common solvent in the
235 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
236 * the fr->solvent_type array with the correct type (or esolNO).
238 * Charge groups that fulfill the conditions but are not identical to the
239 * most common one will be marked as esolNO in the solvent_type array.
241 * TIP3p is identical to SPC for these purposes, so we call it
242 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
244 * NOTE: QM particle should not
245 * become an optimized solvent. Not even if there is only one charge
255 } solvent_parameters_t;
258 check_solvent_cg(const gmx_moltype_t *molt,
261 const unsigned char *qm_grpnr,
262 const t_grps *qm_grps,
264 int *n_solvent_parameters,
265 solvent_parameters_t **solvent_parameters_p,
275 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
276 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
279 solvent_parameters_t *solvent_parameters;
281 /* We use a list with parameters for each solvent type.
282 * Every time we discover a new molecule that fulfills the basic
283 * conditions for a solvent we compare with the previous entries
284 * in these lists. If the parameters are the same we just increment
285 * the counter for that type, and otherwise we create a new type
286 * based on the current molecule.
288 * Once we've finished going through all molecules we check which
289 * solvent is most common, and mark all those molecules while we
290 * clear the flag on all others.
293 solvent_parameters = *solvent_parameters_p;
295 /* Mark the cg first as non optimized */
298 /* Check if this cg has no exclusions with atoms in other charge groups
299 * and all atoms inside the charge group excluded.
300 * We only have 3 or 4 atom solvent loops.
302 if (GET_CGINFO_EXCL_INTER(cginfo) ||
303 !GET_CGINFO_EXCL_INTRA(cginfo))
308 /* Get the indices of the first atom in this charge group */
309 j0 = molt->cgs.index[cg0];
310 j1 = molt->cgs.index[cg0+1];
312 /* Number of atoms in our molecule */
318 "Moltype '%s': there are %d atoms in this charge group\n",
322 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
325 if (nj < 3 || nj > 4)
330 /* Check if we are doing QM on this group */
332 if (qm_grpnr != NULL)
334 for (j = j0; j < j1 && !qm; j++)
336 qm = (qm_grpnr[j] < qm_grps->nr - 1);
339 /* Cannot use solvent optimization with QM */
345 atom = molt->atoms.atom;
347 /* Still looks like a solvent, time to check parameters */
349 /* If it is perturbed (free energy) we can't use the solvent loops,
350 * so then we just skip to the next molecule.
354 for (j = j0; j < j1 && !perturbed; j++)
356 perturbed = PERTURBED(atom[j]);
364 /* Now it's only a question if the VdW and charge parameters
365 * are OK. Before doing the check we compare and see if they are
366 * identical to a possible previous solvent type.
367 * First we assign the current types and charges.
369 for (j = 0; j < nj; j++)
371 tmp_vdwtype[j] = atom[j0+j].type;
372 tmp_charge[j] = atom[j0+j].q;
375 /* Does it match any previous solvent type? */
376 for (k = 0; k < *n_solvent_parameters; k++)
381 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
382 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
383 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
388 /* Check that types & charges match for all atoms in molecule */
389 for (j = 0; j < nj && match == TRUE; j++)
391 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
395 if (tmp_charge[j] != solvent_parameters[k].charge[j])
402 /* Congratulations! We have a matched solvent.
403 * Flag it with this type for later processing.
406 solvent_parameters[k].count += nmol;
408 /* We are done with this charge group */
413 /* If we get here, we have a tentative new solvent type.
414 * Before we add it we must check that it fulfills the requirements
415 * of the solvent optimized loops. First determine which atoms have
418 for (j = 0; j < nj; j++)
421 tjA = tmp_vdwtype[j];
423 /* Go through all other tpes and see if any have non-zero
424 * VdW parameters when combined with this one.
426 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
428 /* We already checked that the atoms weren't perturbed,
429 * so we only need to check state A now.
433 has_vdw[j] = (has_vdw[j] ||
434 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
435 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
436 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
441 has_vdw[j] = (has_vdw[j] ||
442 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
443 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
448 /* Now we know all we need to make the final check and assignment. */
452 * For this we require thatn all atoms have charge,
453 * the charges on atom 2 & 3 should be the same, and only
454 * atom 1 might have VdW.
456 if (has_vdw[1] == FALSE &&
457 has_vdw[2] == FALSE &&
458 tmp_charge[0] != 0 &&
459 tmp_charge[1] != 0 &&
460 tmp_charge[2] == tmp_charge[1])
462 srenew(solvent_parameters, *n_solvent_parameters+1);
463 solvent_parameters[*n_solvent_parameters].model = esolSPC;
464 solvent_parameters[*n_solvent_parameters].count = nmol;
465 for (k = 0; k < 3; k++)
467 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
468 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
471 *cg_sp = *n_solvent_parameters;
472 (*n_solvent_parameters)++;
477 /* Or could it be a TIP4P?
478 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
479 * Only atom 1 mght have VdW.
481 if (has_vdw[1] == FALSE &&
482 has_vdw[2] == FALSE &&
483 has_vdw[3] == FALSE &&
484 tmp_charge[0] == 0 &&
485 tmp_charge[1] != 0 &&
486 tmp_charge[2] == tmp_charge[1] &&
489 srenew(solvent_parameters, *n_solvent_parameters+1);
490 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
491 solvent_parameters[*n_solvent_parameters].count = nmol;
492 for (k = 0; k < 4; k++)
494 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
495 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
498 *cg_sp = *n_solvent_parameters;
499 (*n_solvent_parameters)++;
503 *solvent_parameters_p = solvent_parameters;
507 check_solvent(FILE * fp,
508 const gmx_mtop_t * mtop,
510 cginfo_mb_t *cginfo_mb)
513 const gmx_moltype_t *molt;
514 int mb, mol, cg_mol, at_offset, am, cgm, i, nmol_ch, nmol;
515 int n_solvent_parameters;
516 solvent_parameters_t *solvent_parameters;
522 fprintf(debug, "Going to determine what solvent types we have.\n");
525 n_solvent_parameters = 0;
526 solvent_parameters = NULL;
527 /* Allocate temporary array for solvent type */
528 snew(cg_sp, mtop->nmolblock);
531 for (mb = 0; mb < mtop->nmolblock; mb++)
533 molt = &mtop->moltype[mtop->molblock[mb].type];
535 /* Here we have to loop over all individual molecules
536 * because we need to check for QMMM particles.
538 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
539 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
540 nmol = mtop->molblock[mb].nmol/nmol_ch;
541 for (mol = 0; mol < nmol_ch; mol++)
544 am = mol*cgs->index[cgs->nr];
545 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
547 check_solvent_cg(molt, cg_mol, nmol,
548 mtop->groups.grpnr[egcQMMM] ?
549 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
550 &mtop->groups.grps[egcQMMM],
552 &n_solvent_parameters, &solvent_parameters,
553 cginfo_mb[mb].cginfo[cgm+cg_mol],
554 &cg_sp[mb][cgm+cg_mol]);
557 at_offset += cgs->index[cgs->nr];
560 /* Puh! We finished going through all charge groups.
561 * Now find the most common solvent model.
564 /* Most common solvent this far */
566 for (i = 0; i < n_solvent_parameters; i++)
569 solvent_parameters[i].count > solvent_parameters[bestsp].count)
577 bestsol = solvent_parameters[bestsp].model;
585 for (mb = 0; mb < mtop->nmolblock; mb++)
587 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
588 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
589 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
591 if (cg_sp[mb][i] == bestsp)
593 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
598 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
605 if (bestsol != esolNO && fp != NULL)
607 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
609 solvent_parameters[bestsp].count);
612 sfree(solvent_parameters);
613 fr->solvent_opt = bestsol;
617 acNONE = 0, acCONSTRAINT, acSETTLE
620 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
621 t_forcerec *fr, gmx_bool bNoSolvOpt,
622 gmx_bool *bFEP_NonBonded,
623 gmx_bool *bExcl_IntraCGAll_InterCGNone)
626 const t_blocka *excl;
627 const gmx_moltype_t *molt;
628 const gmx_molblock_t *molb;
629 cginfo_mb_t *cginfo_mb;
632 int cg_offset, a_offset;
633 int mb, m, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
637 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
639 snew(cginfo_mb, mtop->nmolblock);
641 snew(type_VDW, fr->ntype);
642 for (ai = 0; ai < fr->ntype; ai++)
644 type_VDW[ai] = FALSE;
645 for (j = 0; j < fr->ntype; j++)
647 type_VDW[ai] = type_VDW[ai] ||
649 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
650 C12(fr->nbfp, fr->ntype, ai, j) != 0;
654 *bFEP_NonBonded = FALSE;
655 *bExcl_IntraCGAll_InterCGNone = TRUE;
658 snew(bExcl, excl_nalloc);
661 for (mb = 0; mb < mtop->nmolblock; mb++)
663 molb = &mtop->molblock[mb];
664 molt = &mtop->moltype[molb->type];
668 /* Check if the cginfo is identical for all molecules in this block.
669 * If so, we only need an array of the size of one molecule.
670 * Otherwise we make an array of #mol times #cgs per molecule.
673 for (m = 0; m < molb->nmol; m++)
675 int am = m*cgs->index[cgs->nr];
676 for (cg = 0; cg < cgs->nr; cg++)
679 a1 = cgs->index[cg+1];
680 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
681 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
685 if (mtop->groups.grpnr[egcQMMM] != NULL)
687 for (ai = a0; ai < a1; ai++)
689 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
690 mtop->groups.grpnr[egcQMMM][a_offset +ai])
699 cginfo_mb[mb].cg_start = cg_offset;
700 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
701 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
702 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
703 cginfo = cginfo_mb[mb].cginfo;
705 /* Set constraints flags for constrained atoms */
706 snew(a_con, molt->atoms.nr);
707 for (ftype = 0; ftype < F_NRE; ftype++)
709 if (interaction_function[ftype].flags & IF_CONSTRAINT)
714 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
718 for (a = 0; a < nral; a++)
720 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
721 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
727 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
730 int am = m*cgs->index[cgs->nr];
731 for (cg = 0; cg < cgs->nr; cg++)
734 a1 = cgs->index[cg+1];
736 /* Store the energy group in cginfo */
737 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
738 SET_CGINFO_GID(cginfo[cgm+cg], gid);
740 /* Check the intra/inter charge group exclusions */
741 if (a1-a0 > excl_nalloc)
743 excl_nalloc = a1 - a0;
744 srenew(bExcl, excl_nalloc);
746 /* bExclIntraAll: all intra cg interactions excluded
747 * bExclInter: any inter cg interactions excluded
749 bExclIntraAll = TRUE;
753 bHavePerturbedAtoms = FALSE;
754 for (ai = a0; ai < a1; ai++)
756 /* Check VDW and electrostatic interactions */
757 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
758 type_VDW[molt->atoms.atom[ai].typeB]);
759 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
760 molt->atoms.atom[ai].qB != 0);
762 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
764 /* Clear the exclusion list for atom ai */
765 for (aj = a0; aj < a1; aj++)
767 bExcl[aj-a0] = FALSE;
769 /* Loop over all the exclusions of atom ai */
770 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
773 if (aj < a0 || aj >= a1)
782 /* Check if ai excludes a0 to a1 */
783 for (aj = a0; aj < a1; aj++)
787 bExclIntraAll = FALSE;
794 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
797 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
805 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
809 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
811 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
813 /* The size in cginfo is currently only read with DD */
814 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
818 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
822 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
824 if (bHavePerturbedAtoms && fr->efep != efepNO)
826 SET_CGINFO_FEP(cginfo[cgm+cg]);
827 *bFEP_NonBonded = TRUE;
829 /* Store the charge group size */
830 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
832 if (!bExclIntraAll || bExclInter)
834 *bExcl_IntraCGAll_InterCGNone = FALSE;
841 cg_offset += molb->nmol*cgs->nr;
842 a_offset += molb->nmol*cgs->index[cgs->nr];
846 /* the solvent optimizer is called after the QM is initialized,
847 * because we don't want to have the QM subsystemto become an
851 check_solvent(fplog, mtop, fr, cginfo_mb);
853 if (getenv("GMX_NO_SOLV_OPT"))
857 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
858 "Disabling all solvent optimization\n");
860 fr->solvent_opt = esolNO;
864 fr->solvent_opt = esolNO;
866 if (!fr->solvent_opt)
868 for (mb = 0; mb < mtop->nmolblock; mb++)
870 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
872 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
880 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
885 ncg = cgi_mb[nmb-1].cg_end;
888 for (cg = 0; cg < ncg; cg++)
890 while (cg >= cgi_mb[mb].cg_end)
895 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
901 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
903 /*This now calculates sum for q and c6*/
904 double qsum, q2sum, q, c6sum, c6;
906 const t_atoms *atoms;
911 for (mb = 0; mb < mtop->nmolblock; mb++)
913 nmol = mtop->molblock[mb].nmol;
914 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
915 for (i = 0; i < atoms->nr; i++)
917 q = atoms->atom[i].q;
920 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
925 fr->q2sum[0] = q2sum;
926 fr->c6sum[0] = c6sum;
928 if (fr->efep != efepNO)
933 for (mb = 0; mb < mtop->nmolblock; mb++)
935 nmol = mtop->molblock[mb].nmol;
936 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
937 for (i = 0; i < atoms->nr; i++)
939 q = atoms->atom[i].qB;
942 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
946 fr->q2sum[1] = q2sum;
947 fr->c6sum[1] = c6sum;
952 fr->qsum[1] = fr->qsum[0];
953 fr->q2sum[1] = fr->q2sum[0];
954 fr->c6sum[1] = fr->c6sum[0];
958 if (fr->efep == efepNO)
960 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
964 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
965 fr->qsum[0], fr->qsum[1]);
970 void update_forcerec(t_forcerec *fr, matrix box)
972 if (fr->eeltype == eelGRF)
974 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
975 fr->rcoulomb, fr->temp, fr->zsquare, box,
976 &fr->kappa, &fr->k_rf, &fr->c_rf);
980 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
982 const t_atoms *atoms, *atoms_tpi;
983 const t_blocka *excl;
984 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, nexcl, q;
985 gmx_int64_t npair, npair_ij, tmpi, tmpj;
986 double csix, ctwelve;
990 real *nbfp_comb = NULL;
996 /* For LJ-PME, we want to correct for the difference between the
997 * actual C6 values and the C6 values used by the LJ-PME based on
998 * combination rules. */
1000 if (EVDW_PME(fr->vdwtype))
1002 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1003 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1004 for (tpi = 0; tpi < ntp; ++tpi)
1006 for (tpj = 0; tpj < ntp; ++tpj)
1008 C6(nbfp_comb, ntp, tpi, tpj) =
1009 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1010 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1015 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1023 /* Count the types so we avoid natoms^2 operations */
1024 snew(typecount, ntp);
1025 gmx_mtop_count_atomtypes(mtop, q, typecount);
1027 for (tpi = 0; tpi < ntp; tpi++)
1029 for (tpj = tpi; tpj < ntp; tpj++)
1031 tmpi = typecount[tpi];
1032 tmpj = typecount[tpj];
1035 npair_ij = tmpi*tmpj;
1039 npair_ij = tmpi*(tmpi - 1)/2;
1043 /* nbfp now includes the 6.0 derivative prefactor */
1044 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1048 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1049 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1050 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1056 /* Subtract the excluded pairs.
1057 * The main reason for substracting exclusions is that in some cases
1058 * some combinations might never occur and the parameters could have
1059 * any value. These unused values should not influence the dispersion
1062 for (mb = 0; mb < mtop->nmolblock; mb++)
1064 nmol = mtop->molblock[mb].nmol;
1065 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1066 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1067 for (i = 0; (i < atoms->nr); i++)
1071 tpi = atoms->atom[i].type;
1075 tpi = atoms->atom[i].typeB;
1077 j1 = excl->index[i];
1078 j2 = excl->index[i+1];
1079 for (j = j1; j < j2; j++)
1086 tpj = atoms->atom[k].type;
1090 tpj = atoms->atom[k].typeB;
1094 /* nbfp now includes the 6.0 derivative prefactor */
1095 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1099 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1100 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1101 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1111 /* Only correct for the interaction of the test particle
1112 * with the rest of the system.
1115 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1118 for (mb = 0; mb < mtop->nmolblock; mb++)
1120 nmol = mtop->molblock[mb].nmol;
1121 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1122 for (j = 0; j < atoms->nr; j++)
1125 /* Remove the interaction of the test charge group
1128 if (mb == mtop->nmolblock-1)
1132 if (mb == 0 && nmol == 1)
1134 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1139 tpj = atoms->atom[j].type;
1143 tpj = atoms->atom[j].typeB;
1145 for (i = 0; i < fr->n_tpi; i++)
1149 tpi = atoms_tpi->atom[i].type;
1153 tpi = atoms_tpi->atom[i].typeB;
1157 /* nbfp now includes the 6.0 derivative prefactor */
1158 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1162 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1163 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1164 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1171 if (npair - nexcl <= 0 && fplog)
1173 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1179 csix /= npair - nexcl;
1180 ctwelve /= npair - nexcl;
1184 fprintf(debug, "Counted %d exclusions\n", nexcl);
1185 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1186 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1188 fr->avcsix[q] = csix;
1189 fr->avctwelve[q] = ctwelve;
1192 if (EVDW_PME(fr->vdwtype))
1199 if (fr->eDispCorr == edispcAllEner ||
1200 fr->eDispCorr == edispcAllEnerPres)
1202 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1203 fr->avcsix[0], fr->avctwelve[0]);
1207 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1213 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1214 const gmx_mtop_t *mtop)
1216 const t_atoms *at1, *at2;
1217 int mt1, mt2, i, j, tpi, tpj, ntypes;
1223 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1230 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1232 at1 = &mtop->moltype[mt1].atoms;
1233 for (i = 0; (i < at1->nr); i++)
1235 tpi = at1->atom[i].type;
1238 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1241 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1243 at2 = &mtop->moltype[mt2].atoms;
1244 for (j = 0; (j < at2->nr); j++)
1246 tpj = at2->atom[j].type;
1249 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1251 b = BHAMB(nbfp, ntypes, tpi, tpj);
1252 if (b > fr->bham_b_max)
1256 if ((b < bmin) || (bmin == -1))
1266 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1267 bmin, fr->bham_b_max);
1271 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1272 t_forcerec *fr, real rtab,
1273 const t_commrec *cr,
1274 const char *tabfn, char *eg1, char *eg2,
1284 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1289 sprintf(buf, "%s", tabfn);
1292 /* Append the two energy group names */
1293 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1294 eg1, eg2, ftp2ext(efXVG));
1296 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1297 /* Copy the contents of the table to separate coulomb and LJ tables too,
1298 * to improve cache performance.
1300 /* For performance reasons we want
1301 * the table data to be aligned to 16-byte. The pointers could be freed
1302 * but currently aren't.
1304 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1305 nbl->table_elec.format = nbl->table_elec_vdw.format;
1306 nbl->table_elec.r = nbl->table_elec_vdw.r;
1307 nbl->table_elec.n = nbl->table_elec_vdw.n;
1308 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1309 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1310 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1311 nbl->table_elec.ninteractions = 1;
1312 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1313 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1315 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1316 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1317 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1318 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1319 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1320 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1321 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1322 nbl->table_vdw.ninteractions = 2;
1323 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1324 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1326 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1328 for (j = 0; j < 4; j++)
1330 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1332 for (j = 0; j < 8; j++)
1334 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1339 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1340 int *ncount, int **count)
1342 const gmx_moltype_t *molt;
1344 int mt, ftype, stride, i, j, tabnr;
1346 for (mt = 0; mt < mtop->nmoltype; mt++)
1348 molt = &mtop->moltype[mt];
1349 for (ftype = 0; ftype < F_NRE; ftype++)
1351 if (ftype == ftype1 || ftype == ftype2)
1353 il = &molt->ilist[ftype];
1354 stride = 1 + NRAL(ftype);
1355 for (i = 0; i < il->nr; i += stride)
1357 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1360 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1362 if (tabnr >= *ncount)
1364 srenew(*count, tabnr+1);
1365 for (j = *ncount; j < tabnr+1; j++)
1378 static bondedtable_t *make_bonded_tables(FILE *fplog,
1379 int ftype1, int ftype2,
1380 const gmx_mtop_t *mtop,
1381 const char *basefn, const char *tabext)
1383 int i, ncount, *count;
1391 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1396 for (i = 0; i < ncount; i++)
1400 sprintf(tabfn, "%s", basefn);
1401 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1402 tabext, i, ftp2ext(efXVG));
1403 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1412 void forcerec_set_ranges(t_forcerec *fr,
1413 int ncg_home, int ncg_force,
1415 int natoms_force_constr, int natoms_f_novirsum)
1420 /* fr->ncg_force is unused in the standard code,
1421 * but it can be useful for modified code dealing with charge groups.
1423 fr->ncg_force = ncg_force;
1424 fr->natoms_force = natoms_force;
1425 fr->natoms_force_constr = natoms_force_constr;
1427 if (fr->natoms_force_constr > fr->nalloc_force)
1429 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1433 srenew(fr->f_twin, fr->nalloc_force);
1437 if (fr->bF_NoVirSum)
1439 fr->f_novirsum_n = natoms_f_novirsum;
1440 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1442 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1443 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1448 fr->f_novirsum_n = 0;
1452 static real cutoff_inf(real cutoff)
1456 cutoff = GMX_CUTOFF_INF;
1462 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1463 t_forcerec *fr, const t_inputrec *ir,
1464 const char *tabfn, const gmx_mtop_t *mtop,
1472 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1476 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1478 sprintf(buf, "%s", tabfn);
1479 for (i = 0; i < ir->adress->n_tf_grps; i++)
1481 j = ir->adress->tf_table_index[i]; /* get energy group index */
1482 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1483 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1486 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1488 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1493 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1500 ir->rcoulomb == 0 &&
1502 ir->ePBC == epbcNONE &&
1503 ir->vdwtype == evdwCUT &&
1504 ir->coulombtype == eelCUT &&
1505 ir->efep == efepNO &&
1506 (ir->implicit_solvent == eisNO ||
1507 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1508 ir->gb_algorithm == egbHCT ||
1509 ir->gb_algorithm == egbOBC))) &&
1510 getenv("GMX_NO_ALLVSALL") == NULL
1513 if (bAllvsAll && ir->opts.ngener > 1)
1515 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1521 fprintf(stderr, "\n%s\n", note);
1525 fprintf(fp, "\n%s\n", note);
1531 if (bAllvsAll && fp && MASTER(cr))
1533 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1540 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1544 /* These thread local data structures are used for bondeds only */
1545 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1547 if (fr->nthreads > 1)
1549 snew(fr->f_t, fr->nthreads);
1550 /* Thread 0 uses the global force and energy arrays */
1551 for (t = 1; t < fr->nthreads; t++)
1553 fr->f_t[t].f = NULL;
1554 fr->f_t[t].f_nalloc = 0;
1555 snew(fr->f_t[t].fshift, SHIFTS);
1556 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1557 for (i = 0; i < egNR; i++)
1559 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1566 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1567 const t_commrec *cr,
1568 const t_inputrec *ir,
1571 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1573 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1574 bGPU ? "GPUs" : "SIMD kernels",
1575 bGPU ? "CPU only" : "plain-C kernels");
1583 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1587 *kernel_type = nbnxnk4x4_PlainC;
1588 *ewald_excl = ewaldexclTable;
1590 #ifdef GMX_NBNXN_SIMD
1592 #ifdef GMX_NBNXN_SIMD_4XN
1593 *kernel_type = nbnxnk4xN_SIMD_4xN;
1595 #ifdef GMX_NBNXN_SIMD_2XNN
1596 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1599 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1600 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1601 * Currently this is based on the SIMD acceleration choice,
1602 * but it might be better to decide this at runtime based on CPU.
1604 * 4xN calculates more (zero) interactions, but has less pair-search
1605 * work and much better kernel instruction scheduling.
1607 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1608 * which doesn't have FMA, both the analytical and tabulated Ewald
1609 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1610 * 2x(4+4) because it results in significantly fewer pairs.
1611 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1612 * 10% with HT, 50% without HT. As we currently don't detect the actual
1613 * use of HT, use 4x8 to avoid a potential performance hit.
1614 * On Intel Haswell 4x8 is always faster.
1616 *kernel_type = nbnxnk4xN_SIMD_4xN;
1618 #ifndef GMX_SIMD_HAVE_FMA
1619 if (EEL_PME_EWALD(ir->coulombtype) ||
1620 EVDW_PME(ir->vdwtype))
1622 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1623 * There are enough instructions to make 2x(4+4) efficient.
1625 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1628 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1631 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1633 #ifdef GMX_NBNXN_SIMD_4XN
1634 *kernel_type = nbnxnk4xN_SIMD_4xN;
1636 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1639 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1641 #ifdef GMX_NBNXN_SIMD_2XNN
1642 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1644 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1648 /* Analytical Ewald exclusion correction is only an option in
1650 * Since table lookup's don't parallelize with SIMD, analytical
1651 * will probably always be faster for a SIMD width of 8 or more.
1652 * With FMA analytical is sometimes faster for a width if 4 as well.
1653 * On BlueGene/Q, this is faster regardless of precision.
1654 * In single precision, this is faster on Bulldozer.
1656 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1657 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1658 defined GMX_SIMD_IBM_QPX
1659 *ewald_excl = ewaldexclAnalytical;
1661 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1663 *ewald_excl = ewaldexclTable;
1665 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1667 *ewald_excl = ewaldexclAnalytical;
1671 #endif /* GMX_NBNXN_SIMD */
1675 const char *lookup_nbnxn_kernel_name(int kernel_type)
1677 const char *returnvalue = NULL;
1678 switch (kernel_type)
1681 returnvalue = "not set";
1683 case nbnxnk4x4_PlainC:
1684 returnvalue = "plain C";
1686 case nbnxnk4xN_SIMD_4xN:
1687 case nbnxnk4xN_SIMD_2xNN:
1688 #ifdef GMX_NBNXN_SIMD
1689 #if defined GMX_SIMD_X86_SSE2
1690 returnvalue = "SSE2";
1691 #elif defined GMX_SIMD_X86_SSE4_1
1692 returnvalue = "SSE4.1";
1693 #elif defined GMX_SIMD_X86_AVX_128_FMA
1694 returnvalue = "AVX_128_FMA";
1695 #elif defined GMX_SIMD_X86_AVX_256
1696 returnvalue = "AVX_256";
1697 #elif defined GMX_SIMD_X86_AVX2_256
1698 returnvalue = "AVX2_256";
1700 returnvalue = "SIMD";
1702 #else /* GMX_NBNXN_SIMD */
1703 returnvalue = "not available";
1704 #endif /* GMX_NBNXN_SIMD */
1706 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1707 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1711 gmx_fatal(FARGS, "Illegal kernel type selected");
1718 static void pick_nbnxn_kernel(FILE *fp,
1719 const t_commrec *cr,
1720 gmx_bool use_simd_kernels,
1722 gmx_bool bEmulateGPU,
1723 const t_inputrec *ir,
1726 gmx_bool bDoNonbonded)
1728 assert(kernel_type);
1730 *kernel_type = nbnxnkNotSet;
1731 *ewald_excl = ewaldexclTable;
1735 *kernel_type = nbnxnk8x8x8_PlainC;
1739 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1744 *kernel_type = nbnxnk8x8x8_CUDA;
1747 if (*kernel_type == nbnxnkNotSet)
1749 /* LJ PME with LB combination rule does 7 mesh operations.
1750 * This so slow that we don't compile SIMD non-bonded kernels for that.
1752 if (use_simd_kernels &&
1753 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1755 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1759 *kernel_type = nbnxnk4x4_PlainC;
1763 if (bDoNonbonded && fp != NULL)
1765 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1766 lookup_nbnxn_kernel_name(*kernel_type),
1767 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1768 nbnxn_kernel_to_cj_size(*kernel_type));
1770 if (nbnxnk4x4_PlainC == *kernel_type ||
1771 nbnxnk8x8x8_PlainC == *kernel_type)
1773 md_print_warn(cr, fp,
1774 "WARNING: Using the slow %s kernels. This should\n"
1775 "not happen during routine usage on supported platforms.\n\n",
1776 lookup_nbnxn_kernel_name(*kernel_type));
1781 static void pick_nbnxn_resources(FILE *fp,
1782 const t_commrec *cr,
1783 const gmx_hw_info_t *hwinfo,
1784 gmx_bool bDoNonbonded,
1786 gmx_bool *bEmulateGPU,
1787 const gmx_gpu_opt_t *gpu_opt)
1789 gmx_bool bEmulateGPUEnvVarSet;
1790 char gpu_err_str[STRLEN];
1794 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1796 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1797 * GPUs (currently) only handle non-bonded calculations, we will
1798 * automatically switch to emulation if non-bonded calculations are
1799 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1800 * way to turn off GPU initialization, data movement, and cleanup.
1802 * GPU emulation can be useful to assess the performance one can expect by
1803 * adding GPU(s) to the machine. The conditional below allows this even
1804 * if mdrun is compiled without GPU acceleration support.
1805 * Note that you should freezing the system as otherwise it will explode.
1807 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1809 gpu_opt->ncuda_dev_use > 0));
1811 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1813 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1815 /* Each PP node will use the intra-node id-th device from the
1816 * list of detected/selected GPUs. */
1817 if (!init_gpu(fp, cr->rank_pp_intranode, gpu_err_str,
1818 &hwinfo->gpu_info, gpu_opt))
1820 /* At this point the init should never fail as we made sure that
1821 * we have all the GPUs we need. If it still does, we'll bail. */
1822 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1824 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1825 cr->rank_pp_intranode),
1829 /* Here we actually turn on hardware GPU acceleration */
1834 gmx_bool uses_simple_tables(int cutoff_scheme,
1835 nonbonded_verlet_t *nbv,
1838 gmx_bool bUsesSimpleTables = TRUE;
1841 switch (cutoff_scheme)
1844 bUsesSimpleTables = TRUE;
1847 assert(NULL != nbv && NULL != nbv->grp);
1848 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1849 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1852 gmx_incons("unimplemented");
1854 return bUsesSimpleTables;
1857 static void init_ewald_f_table(interaction_const_t *ic,
1858 gmx_bool bUsesSimpleTables,
1863 if (bUsesSimpleTables)
1865 /* Get the Ewald table spacing based on Coulomb and/or LJ
1866 * Ewald coefficients and rtol.
1868 ic->tabq_scale = ewald_spline3_table_scale(ic);
1870 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1871 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1875 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1876 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1877 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1880 sfree_aligned(ic->tabq_coul_FDV0);
1881 sfree_aligned(ic->tabq_coul_F);
1882 sfree_aligned(ic->tabq_coul_V);
1884 sfree_aligned(ic->tabq_vdw_FDV0);
1885 sfree_aligned(ic->tabq_vdw_F);
1886 sfree_aligned(ic->tabq_vdw_V);
1888 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1890 /* Create the original table data in FDV0 */
1891 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1892 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1893 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1894 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1895 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1898 if (EVDW_PME(ic->vdwtype))
1900 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1901 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1902 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1903 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1904 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1908 void init_interaction_const_tables(FILE *fp,
1909 interaction_const_t *ic,
1910 gmx_bool bUsesSimpleTables,
1913 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1915 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1919 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1920 1/ic->tabq_scale, ic->tabq_size);
1925 static void clear_force_switch_constants(shift_consts_t *sc)
1932 static void force_switch_constants(real p,
1936 /* Here we determine the coefficient for shifting the force to zero
1937 * between distance rsw and the cut-off rc.
1938 * For a potential of r^-p, we have force p*r^-(p+1).
1939 * But to save flops we absorb p in the coefficient.
1941 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1942 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1944 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1945 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1946 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1949 static void potential_switch_constants(real rsw, real rc,
1950 switch_consts_t *sc)
1952 /* The switch function is 1 at rsw and 0 at rc.
1953 * The derivative and second derivate are zero at both ends.
1954 * rsw = max(r - r_switch, 0)
1955 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1956 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1957 * force = force*dsw - potential*sw
1960 sc->c3 = -10*pow(rc - rsw, -3);
1961 sc->c4 = 15*pow(rc - rsw, -4);
1962 sc->c5 = -6*pow(rc - rsw, -5);
1966 init_interaction_const(FILE *fp,
1967 const t_commrec gmx_unused *cr,
1968 interaction_const_t **interaction_const,
1969 const t_forcerec *fr,
1972 interaction_const_t *ic;
1973 gmx_bool bUsesSimpleTables = TRUE;
1974 const real minusSix = -6.0;
1975 const real minusTwelve = -12.0;
1979 /* Just allocate something so we can free it */
1980 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1981 snew_aligned(ic->tabq_coul_F, 16, 32);
1982 snew_aligned(ic->tabq_coul_V, 16, 32);
1984 ic->rlist = fr->rlist;
1985 ic->rlistlong = fr->rlistlong;
1988 ic->vdwtype = fr->vdwtype;
1989 ic->vdw_modifier = fr->vdw_modifier;
1990 ic->rvdw = fr->rvdw;
1991 ic->rvdw_switch = fr->rvdw_switch;
1992 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1993 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1994 ic->sh_lj_ewald = 0;
1995 clear_force_switch_constants(&ic->dispersion_shift);
1996 clear_force_switch_constants(&ic->repulsion_shift);
1998 switch (ic->vdw_modifier)
2000 case eintmodPOTSHIFT:
2001 /* Only shift the potential, don't touch the force */
2002 ic->dispersion_shift.cpot = -pow(ic->rvdw, minusSix);
2003 ic->repulsion_shift.cpot = -pow(ic->rvdw, minusTwelve);
2004 if (EVDW_PME(ic->vdwtype))
2008 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2009 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, minusSix);
2012 case eintmodFORCESWITCH:
2013 /* Switch the force, switch and shift the potential */
2014 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2015 &ic->dispersion_shift);
2016 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2017 &ic->repulsion_shift);
2019 case eintmodPOTSWITCH:
2020 /* Switch the potential and force */
2021 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2025 case eintmodEXACTCUTOFF:
2026 /* Nothing to do here */
2029 gmx_incons("unimplemented potential modifier");
2032 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2034 /* Electrostatics */
2035 ic->eeltype = fr->eeltype;
2036 ic->coulomb_modifier = fr->coulomb_modifier;
2037 ic->rcoulomb = fr->rcoulomb;
2038 ic->epsilon_r = fr->epsilon_r;
2039 ic->epsfac = fr->epsfac;
2040 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2042 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2044 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2051 /* Reaction-field */
2052 if (EEL_RF(ic->eeltype))
2054 ic->epsilon_rf = fr->epsilon_rf;
2055 ic->k_rf = fr->k_rf;
2056 ic->c_rf = fr->c_rf;
2060 /* For plain cut-off we might use the reaction-field kernels */
2061 ic->epsilon_rf = ic->epsilon_r;
2063 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2065 ic->c_rf = 1/ic->rcoulomb;
2075 real dispersion_shift;
2077 dispersion_shift = ic->dispersion_shift.cpot;
2078 if (EVDW_PME(ic->vdwtype))
2080 dispersion_shift -= ic->sh_lj_ewald;
2082 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2083 ic->repulsion_shift.cpot, dispersion_shift);
2085 if (ic->eeltype == eelCUT)
2087 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2089 else if (EEL_PME(ic->eeltype))
2091 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2096 *interaction_const = ic;
2098 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2100 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2102 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2103 * also sharing texture references. To keep the code simple, we don't
2104 * treat texture references as shared resources, but this means that
2105 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2106 * Hence, to ensure that the non-bonded kernels don't start before all
2107 * texture binding operations are finished, we need to wait for all ranks
2108 * to arrive here before continuing.
2110 * Note that we could omit this barrier if GPUs are not shared (or
2111 * texture objects are used), but as this is initialization code, there
2112 * is not point in complicating things.
2114 #ifdef GMX_THREAD_MPI
2119 #endif /* GMX_THREAD_MPI */
2122 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2123 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2126 static void init_nb_verlet(FILE *fp,
2127 nonbonded_verlet_t **nb_verlet,
2128 gmx_bool bFEP_NonBonded,
2129 const t_inputrec *ir,
2130 const t_forcerec *fr,
2131 const t_commrec *cr,
2132 const char *nbpu_opt)
2134 nonbonded_verlet_t *nbv;
2137 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2139 nbnxn_alloc_t *nb_alloc;
2140 nbnxn_free_t *nb_free;
2144 pick_nbnxn_resources(fp, cr, fr->hwinfo,
2152 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2153 for (i = 0; i < nbv->ngrp; i++)
2155 nbv->grp[i].nbl_lists.nnbl = 0;
2156 nbv->grp[i].nbat = NULL;
2157 nbv->grp[i].kernel_type = nbnxnkNotSet;
2159 if (i == 0) /* local */
2161 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2162 nbv->bUseGPU, bEmulateGPU, ir,
2163 &nbv->grp[i].kernel_type,
2164 &nbv->grp[i].ewald_excl,
2167 else /* non-local */
2169 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2171 /* Use GPU for local, select a CPU kernel for non-local */
2172 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2174 &nbv->grp[i].kernel_type,
2175 &nbv->grp[i].ewald_excl,
2178 bHybridGPURun = TRUE;
2182 /* Use the same kernel for local and non-local interactions */
2183 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2184 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2191 /* init the NxN GPU data; the last argument tells whether we'll have
2192 * both local and non-local NB calculation on GPU */
2193 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2194 &fr->hwinfo->gpu_info, fr->gpu_opt,
2195 cr->rank_pp_intranode,
2196 (nbv->ngrp > 1) && !bHybridGPURun);
2198 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2202 nbv->min_ci_balanced = strtol(env, &end, 10);
2203 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2205 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2210 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2211 nbv->min_ci_balanced);
2216 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2219 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2220 nbv->min_ci_balanced);
2226 nbv->min_ci_balanced = 0;
2231 nbnxn_init_search(&nbv->nbs,
2232 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2233 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2235 gmx_omp_nthreads_get(emntPairsearch));
2237 for (i = 0; i < nbv->ngrp; i++)
2239 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2241 nb_alloc = &pmalloc;
2250 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2251 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2252 /* 8x8x8 "non-simple" lists are ATM always combined */
2253 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2257 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2259 gmx_bool bSimpleList;
2260 int enbnxninitcombrule;
2262 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2264 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2266 /* Plain LJ cut-off: we can optimize with combination rules */
2267 enbnxninitcombrule = enbnxninitcombruleDETECT;
2269 else if (fr->vdwtype == evdwPME)
2271 /* LJ-PME: we need to use a combination rule for the grid */
2272 if (fr->ljpme_combination_rule == eljpmeGEOM)
2274 enbnxninitcombrule = enbnxninitcombruleGEOM;
2278 enbnxninitcombrule = enbnxninitcombruleLB;
2283 /* We use a full combination matrix: no rule required */
2284 enbnxninitcombrule = enbnxninitcombruleNONE;
2288 snew(nbv->grp[i].nbat, 1);
2289 nbnxn_atomdata_init(fp,
2291 nbv->grp[i].kernel_type,
2293 fr->ntype, fr->nbfp,
2295 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2300 nbv->grp[i].nbat = nbv->grp[0].nbat;
2305 gmx_bool usingGpu(nonbonded_verlet_t *nbv)
2307 return nbv != NULL && nbv->bUseGPU;
2310 void init_forcerec(FILE *fp,
2311 const output_env_t oenv,
2314 const t_inputrec *ir,
2315 const gmx_mtop_t *mtop,
2316 const t_commrec *cr,
2322 const char *nbpu_opt,
2323 gmx_bool bNoSolvOpt,
2326 int i, m, negp_pp, negptable, egi, egj;
2331 gmx_bool bGenericKernelOnly;
2332 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2333 gmx_bool bFEP_NonBonded;
2334 int *nm_ind, egp_flags;
2336 if (fr->hwinfo == NULL)
2338 /* Detect hardware, gather information.
2339 * In mdrun, hwinfo has already been set before calling init_forcerec.
2340 * Here we ignore GPUs, as tools will not use them anyhow.
2342 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2345 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2346 fr->use_simd_kernels = TRUE;
2348 fr->bDomDec = DOMAINDECOMP(cr);
2350 if (check_box(ir->ePBC, box))
2352 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2355 /* Test particle insertion ? */
2358 /* Set to the size of the molecule to be inserted (the last one) */
2359 /* Because of old style topologies, we have to use the last cg
2360 * instead of the last molecule type.
2362 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2363 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2364 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2366 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2374 /* Copy AdResS parameters */
2377 fr->adress_type = ir->adress->type;
2378 fr->adress_const_wf = ir->adress->const_wf;
2379 fr->adress_ex_width = ir->adress->ex_width;
2380 fr->adress_hy_width = ir->adress->hy_width;
2381 fr->adress_icor = ir->adress->icor;
2382 fr->adress_site = ir->adress->site;
2383 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2384 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2387 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2388 for (i = 0; i < ir->adress->n_energy_grps; i++)
2390 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2393 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2394 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2395 for (i = 0; i < fr->n_adress_tf_grps; i++)
2397 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2399 copy_rvec(ir->adress->refs, fr->adress_refs);
2403 fr->adress_type = eAdressOff;
2404 fr->adress_do_hybridpairs = FALSE;
2407 /* Copy the user determined parameters */
2408 fr->userint1 = ir->userint1;
2409 fr->userint2 = ir->userint2;
2410 fr->userint3 = ir->userint3;
2411 fr->userint4 = ir->userint4;
2412 fr->userreal1 = ir->userreal1;
2413 fr->userreal2 = ir->userreal2;
2414 fr->userreal3 = ir->userreal3;
2415 fr->userreal4 = ir->userreal4;
2418 fr->fc_stepsize = ir->fc_stepsize;
2421 fr->efep = ir->efep;
2422 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2423 if (ir->fepvals->bScCoul)
2425 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2426 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2430 fr->sc_alphacoul = 0;
2431 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2433 fr->sc_power = ir->fepvals->sc_power;
2434 fr->sc_r_power = ir->fepvals->sc_r_power;
2435 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2437 env = getenv("GMX_SCSIGMA_MIN");
2441 sscanf(env, "%20lf", &dbl);
2442 fr->sc_sigma6_min = pow(dbl, 6);
2445 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2449 fr->bNonbonded = TRUE;
2450 if (getenv("GMX_NO_NONBONDED") != NULL)
2452 /* turn off non-bonded calculations */
2453 fr->bNonbonded = FALSE;
2454 md_print_warn(cr, fp,
2455 "Found environment variable GMX_NO_NONBONDED.\n"
2456 "Disabling nonbonded calculations.\n");
2459 bGenericKernelOnly = FALSE;
2461 /* We now check in the NS code whether a particular combination of interactions
2462 * can be used with water optimization, and disable it if that is not the case.
2465 if (getenv("GMX_NB_GENERIC") != NULL)
2470 "Found environment variable GMX_NB_GENERIC.\n"
2471 "Disabling all interaction-specific nonbonded kernels, will only\n"
2472 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2474 bGenericKernelOnly = TRUE;
2477 if (bGenericKernelOnly == TRUE)
2482 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2484 fr->use_simd_kernels = FALSE;
2488 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2489 "Disabling the usage of any SIMD-specific non-bonded & bonded kernel routines\n"
2490 "(e.g. SSE2/SSE4.1/AVX).\n\n");
2494 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2496 /* Check if we can/should do all-vs-all kernels */
2497 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2498 fr->AllvsAll_work = NULL;
2499 fr->AllvsAll_workgb = NULL;
2501 /* All-vs-all kernels have not been implemented in 4.6, and
2502 * the SIMD group kernels are also buggy in this case. Non-SIMD
2503 * group kernels are OK. See Redmine #1249. */
2506 fr->bAllvsAll = FALSE;
2507 fr->use_simd_kernels = FALSE;
2511 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2512 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2513 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2514 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2515 "we are proceeding by disabling all CPU architecture-specific\n"
2516 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2517 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2521 /* Neighbour searching stuff */
2522 fr->cutoff_scheme = ir->cutoff_scheme;
2523 fr->bGrid = (ir->ns_type == ensGRID);
2524 fr->ePBC = ir->ePBC;
2526 if (fr->cutoff_scheme == ecutsGROUP)
2528 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2529 "removed in a future release when 'verlet' supports all interaction forms.\n";
2533 fprintf(stderr, "\n%s\n", note);
2537 fprintf(fp, "\n%s\n", note);
2541 /* Determine if we will do PBC for distances in bonded interactions */
2542 if (fr->ePBC == epbcNONE)
2544 fr->bMolPBC = FALSE;
2548 if (!DOMAINDECOMP(cr))
2550 /* The group cut-off scheme and SHAKE assume charge groups
2551 * are whole, but not using molpbc is faster in most cases.
2553 if (fr->cutoff_scheme == ecutsGROUP ||
2554 (ir->eConstrAlg == econtSHAKE &&
2555 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2556 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2558 fr->bMolPBC = ir->bPeriodicMols;
2563 if (getenv("GMX_USE_GRAPH") != NULL)
2565 fr->bMolPBC = FALSE;
2568 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2575 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2578 fr->bGB = (ir->implicit_solvent == eisGBSA);
2580 fr->rc_scaling = ir->refcoord_scaling;
2581 copy_rvec(ir->posres_com, fr->posres_com);
2582 copy_rvec(ir->posres_comB, fr->posres_comB);
2583 fr->rlist = cutoff_inf(ir->rlist);
2584 fr->rlistlong = cutoff_inf(ir->rlistlong);
2585 fr->eeltype = ir->coulombtype;
2586 fr->vdwtype = ir->vdwtype;
2587 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2589 fr->coulomb_modifier = ir->coulomb_modifier;
2590 fr->vdw_modifier = ir->vdw_modifier;
2592 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2593 switch (fr->eeltype)
2596 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2602 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2606 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2607 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2616 case eelPMEUSERSWITCH:
2617 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2622 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2626 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2630 /* Vdw: Translate from mdp settings to kernel format */
2631 switch (fr->vdwtype)
2636 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2640 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2644 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2650 case evdwENCADSHIFT:
2651 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2655 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2659 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2660 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2661 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2663 fr->rvdw = cutoff_inf(ir->rvdw);
2664 fr->rvdw_switch = ir->rvdw_switch;
2665 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2666 fr->rcoulomb_switch = ir->rcoulomb_switch;
2668 fr->bTwinRange = fr->rlistlong > fr->rlist;
2669 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2671 fr->reppow = mtop->ffparams.reppow;
2673 if (ir->cutoff_scheme == ecutsGROUP)
2675 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2676 && !EVDW_PME(fr->vdwtype));
2677 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2678 fr->bcoultab = !(fr->eeltype == eelCUT ||
2679 fr->eeltype == eelEWALD ||
2680 fr->eeltype == eelPME ||
2681 fr->eeltype == eelRF ||
2682 fr->eeltype == eelRF_ZERO);
2684 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2685 * going to be faster to tabulate the interaction than calling the generic kernel.
2686 * However, if generic kernels have been requested we keep things analytically.
2688 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2689 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2690 bGenericKernelOnly == FALSE)
2692 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2694 fr->bcoultab = TRUE;
2695 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2696 * which would otherwise need two tables.
2700 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2701 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2702 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2703 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2705 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2707 fr->bcoultab = TRUE;
2711 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2713 fr->bcoultab = TRUE;
2715 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2720 if (getenv("GMX_REQUIRE_TABLES"))
2723 fr->bcoultab = TRUE;
2728 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2729 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2732 if (fr->bvdwtab == TRUE)
2734 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2735 fr->nbkernel_vdw_modifier = eintmodNONE;
2737 if (fr->bcoultab == TRUE)
2739 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2740 fr->nbkernel_elec_modifier = eintmodNONE;
2744 if (ir->cutoff_scheme == ecutsVERLET)
2746 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2748 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2750 fr->bvdwtab = FALSE;
2751 fr->bcoultab = FALSE;
2754 /* Tables are used for direct ewald sum */
2757 if (EEL_PME(ir->coulombtype))
2761 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2763 if (ir->coulombtype == eelP3M_AD)
2765 please_cite(fp, "Hockney1988");
2766 please_cite(fp, "Ballenegger2012");
2770 please_cite(fp, "Essmann95a");
2773 if (ir->ewald_geometry == eewg3DC)
2777 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2779 please_cite(fp, "In-Chul99a");
2782 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2783 init_ewald_tab(&(fr->ewald_table), ir, fp);
2786 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2787 1/fr->ewaldcoeff_q);
2791 if (EVDW_PME(ir->vdwtype))
2795 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2797 please_cite(fp, "Essmann95a");
2798 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2801 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2802 1/fr->ewaldcoeff_lj);
2806 /* Electrostatics */
2807 fr->epsilon_r = ir->epsilon_r;
2808 fr->epsilon_rf = ir->epsilon_rf;
2809 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2811 /* Parameters for generalized RF */
2815 if (fr->eeltype == eelGRF)
2817 init_generalized_rf(fp, mtop, ir, fr);
2820 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2821 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2822 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2823 IR_ELEC_FIELD(*ir) ||
2824 (fr->adress_icor != eAdressICOff)
2827 if (fr->cutoff_scheme == ecutsGROUP &&
2828 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2830 /* Count the total number of charge groups */
2831 fr->cg_nalloc = ncg_mtop(mtop);
2832 srenew(fr->cg_cm, fr->cg_nalloc);
2834 if (fr->shift_vec == NULL)
2836 snew(fr->shift_vec, SHIFTS);
2839 if (fr->fshift == NULL)
2841 snew(fr->fshift, SHIFTS);
2844 if (fr->nbfp == NULL)
2846 fr->ntype = mtop->ffparams.atnr;
2847 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2848 if (EVDW_PME(fr->vdwtype))
2850 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2854 /* Copy the energy group exclusions */
2855 fr->egp_flags = ir->opts.egp_flags;
2857 /* Van der Waals stuff */
2858 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2860 if (fr->rvdw_switch >= fr->rvdw)
2862 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2863 fr->rvdw_switch, fr->rvdw);
2867 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2868 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2869 fr->rvdw_switch, fr->rvdw);
2873 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2875 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2878 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2880 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2883 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2885 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2890 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2891 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2894 fr->eDispCorr = ir->eDispCorr;
2895 if (ir->eDispCorr != edispcNO)
2897 set_avcsixtwelve(fp, fr, mtop);
2902 set_bham_b_max(fp, fr, mtop);
2905 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2907 /* Copy the GBSA data (radius, volume and surftens for each
2908 * atomtype) from the topology atomtype section to forcerec.
2910 snew(fr->atype_radius, fr->ntype);
2911 snew(fr->atype_vol, fr->ntype);
2912 snew(fr->atype_surftens, fr->ntype);
2913 snew(fr->atype_gb_radius, fr->ntype);
2914 snew(fr->atype_S_hct, fr->ntype);
2916 if (mtop->atomtypes.nr > 0)
2918 for (i = 0; i < fr->ntype; i++)
2920 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2922 for (i = 0; i < fr->ntype; i++)
2924 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2926 for (i = 0; i < fr->ntype; i++)
2928 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2930 for (i = 0; i < fr->ntype; i++)
2932 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2934 for (i = 0; i < fr->ntype; i++)
2936 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2940 /* Generate the GB table if needed */
2944 fr->gbtabscale = 2000;
2946 fr->gbtabscale = 500;
2950 fr->gbtab = make_gb_table(oenv, fr);
2952 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2954 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2955 if (!DOMAINDECOMP(cr))
2957 make_local_gb(cr, fr->born, ir->gb_algorithm);
2961 /* Set the charge scaling */
2962 if (fr->epsilon_r != 0)
2964 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2968 /* eps = 0 is infinite dieletric: no coulomb interactions */
2972 /* Reaction field constants */
2973 if (EEL_RF(fr->eeltype))
2975 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2976 fr->rcoulomb, fr->temp, fr->zsquare, box,
2977 &fr->kappa, &fr->k_rf, &fr->c_rf);
2980 /*This now calculates sum for q and c6*/
2981 set_chargesum(fp, fr, mtop);
2983 /* if we are using LR electrostatics, and they are tabulated,
2984 * the tables will contain modified coulomb interactions.
2985 * Since we want to use the non-shifted ones for 1-4
2986 * coulombic interactions, we must have an extra set of tables.
2989 /* Construct tables.
2990 * A little unnecessary to make both vdw and coul tables sometimes,
2991 * but what the heck... */
2993 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2994 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2996 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2997 fr->coulomb_modifier != eintmodNONE ||
2998 fr->vdw_modifier != eintmodNONE ||
2999 fr->bBHAM || fr->bEwald) &&
3000 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
3001 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
3002 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
3004 negp_pp = ir->opts.ngener - ir->nwall;
3008 bSomeNormalNbListsAreInUse = TRUE;
3013 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3014 for (egi = 0; egi < negp_pp; egi++)
3016 for (egj = egi; egj < negp_pp; egj++)
3018 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3019 if (!(egp_flags & EGP_EXCL))
3021 if (egp_flags & EGP_TABLE)
3027 bSomeNormalNbListsAreInUse = TRUE;
3032 if (bSomeNormalNbListsAreInUse)
3034 fr->nnblists = negptable + 1;
3038 fr->nnblists = negptable;
3040 if (fr->nnblists > 1)
3042 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3051 snew(fr->nblists, fr->nnblists);
3053 /* This code automatically gives table length tabext without cut-off's,
3054 * in that case grompp should already have checked that we do not need
3055 * normal tables and we only generate tables for 1-4 interactions.
3057 rtab = ir->rlistlong + ir->tabext;
3061 /* make tables for ordinary interactions */
3062 if (bSomeNormalNbListsAreInUse)
3064 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3067 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3069 if (!bMakeSeparate14Table)
3071 fr->tab14 = fr->nblists[0].table_elec_vdw;
3081 /* Read the special tables for certain energy group pairs */
3082 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3083 for (egi = 0; egi < negp_pp; egi++)
3085 for (egj = egi; egj < negp_pp; egj++)
3087 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3088 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3090 if (fr->nnblists > 1)
3092 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3094 /* Read the table file with the two energy groups names appended */
3095 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3096 *mtop->groups.grpname[nm_ind[egi]],
3097 *mtop->groups.grpname[nm_ind[egj]],
3101 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3102 *mtop->groups.grpname[nm_ind[egi]],
3103 *mtop->groups.grpname[nm_ind[egj]],
3104 &fr->nblists[fr->nnblists/2+m]);
3108 else if (fr->nnblists > 1)
3110 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3116 else if ((fr->eDispCorr != edispcNO) &&
3117 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3118 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3119 (fr->vdw_modifier == eintmodPOTSHIFT)))
3121 /* Tables might not be used for the potential modifier interactions per se, but
3122 * we still need them to evaluate switch/shift dispersion corrections in this case.
3124 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3127 if (bMakeSeparate14Table)
3129 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3130 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3131 GMX_MAKETABLES_14ONLY);
3134 /* Read AdResS Thermo Force table if needed */
3135 if (fr->adress_icor == eAdressICThermoForce)
3137 /* old todo replace */
3139 if (ir->adress->n_tf_grps > 0)
3141 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3146 /* load the default table */
3147 snew(fr->atf_tabs, 1);
3148 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3153 fr->nwall = ir->nwall;
3154 if (ir->nwall && ir->wall_type == ewtTABLE)
3156 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3161 fcd->bondtab = make_bonded_tables(fp,
3162 F_TABBONDS, F_TABBONDSNC,
3164 fcd->angletab = make_bonded_tables(fp,
3167 fcd->dihtab = make_bonded_tables(fp,
3175 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3179 /* QM/MM initialization if requested
3183 fprintf(stderr, "QM/MM calculation requested.\n");
3186 fr->bQMMM = ir->bQMMM;
3187 fr->qr = mk_QMMMrec();
3189 /* Set all the static charge group info */
3190 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3192 &fr->bExcl_IntraCGAll_InterCGNone);
3193 if (DOMAINDECOMP(cr))
3199 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3202 if (!DOMAINDECOMP(cr))
3204 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3205 mtop->natoms, mtop->natoms, mtop->natoms);
3208 fr->print_force = print_force;
3211 /* coarse load balancing vars */
3216 /* Initialize neighbor search */
3217 init_ns(fp, cr, &fr->ns, fr, mtop);
3219 if (cr->duty & DUTY_PP)
3221 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3225 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3230 /* Initialize the thread working data for bonded interactions */
3231 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3233 snew(fr->excl_load, fr->nthreads+1);
3235 if (fr->cutoff_scheme == ecutsVERLET)
3237 if (ir->rcoulomb != ir->rvdw)
3239 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3242 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3245 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3246 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3248 if (ir->eDispCorr != edispcNO)
3250 calc_enervirdiff(fp, ir->eDispCorr, fr);
3254 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3255 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3256 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3258 void pr_forcerec(FILE *fp, t_forcerec *fr)
3262 pr_real(fp, fr->rlist);
3263 pr_real(fp, fr->rcoulomb);
3264 pr_real(fp, fr->fudgeQQ);
3265 pr_bool(fp, fr->bGrid);
3266 pr_bool(fp, fr->bTwinRange);
3267 /*pr_int(fp,fr->cg0);
3268 pr_int(fp,fr->hcg);*/
3269 for (i = 0; i < fr->nnblists; i++)
3271 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3273 pr_real(fp, fr->rcoulomb_switch);
3274 pr_real(fp, fr->rcoulomb);
3279 void forcerec_set_excl_load(t_forcerec *fr,
3280 const gmx_localtop_t *top)
3283 int t, i, j, ntot, n, ntarget;
3285 ind = top->excls.index;
3289 for (i = 0; i < top->excls.nr; i++)
3291 for (j = ind[i]; j < ind[i+1]; j++)
3300 fr->excl_load[0] = 0;
3303 for (t = 1; t <= fr->nthreads; t++)
3305 ntarget = (ntot*t)/fr->nthreads;
3306 while (i < top->excls.nr && n < ntarget)
3308 for (j = ind[i]; j < ind[i+1]; j++)
3317 fr->excl_load[t] = i;
3321 /* Frees GPU memory and destroys the CUDA context.
3323 * Note that this function needs to be called even if GPUs are not used
3324 * in this run because the PME ranks have no knowledge of whether GPUs
3325 * are used or not, but all ranks need to enter the barrier below.
3327 void free_gpu_resources(const t_forcerec *fr,
3328 const t_commrec *cr,
3329 const gmx_gpu_info_t *gpu_info,
3330 const gmx_gpu_opt_t *gpu_opt)
3332 gmx_bool bIsPPrankUsingGPU;
3333 char gpu_err_str[STRLEN];
3335 bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
3337 if (bIsPPrankUsingGPU)
3339 /* free nbnxn data in GPU memory */
3340 nbnxn_cuda_free(fr->nbv->cu_nbv);
3342 /* With tMPI we need to wait for all ranks to finish deallocation before
3343 * destroying the context in free_gpu() as some ranks may be sharing
3345 * Note: as only PP ranks need to free GPU resources, so it is safe to
3346 * not call the barrier on PME ranks.
3348 #ifdef GMX_THREAD_MPI
3353 #endif /* GMX_THREAD_MPI */
3355 /* uninitialize GPU (by destroying the context) */
3356 if (!free_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
3358 gmx_warning("On rank %d failed to free GPU #%d: %s",
3359 cr->nodeid, get_current_gpu_device_id(), gpu_err_str);