2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/legacyheaders/typedefs.h"
46 #include "gromacs/legacyheaders/types/commrec.h"
47 #include "gromacs/math/vec.h"
48 #include "gromacs/math/utilities.h"
49 #include "gromacs/legacyheaders/macros.h"
50 #include "gromacs/legacyheaders/macros.h"
51 #include "gromacs/math/units.h"
52 #include "gromacs/legacyheaders/force.h"
53 #include "gromacs/legacyheaders/tables.h"
54 #include "gromacs/legacyheaders/nonbonded.h"
55 #include "gromacs/legacyheaders/names.h"
56 #include "gromacs/legacyheaders/network.h"
57 #include "gromacs/legacyheaders/ns.h"
58 #include "gromacs/legacyheaders/txtdump.h"
59 #include "gromacs/legacyheaders/coulomb.h"
60 #include "gromacs/legacyheaders/md_support.h"
61 #include "gromacs/legacyheaders/md_logging.h"
62 #include "gromacs/legacyheaders/domdec.h"
63 #include "gromacs/legacyheaders/qmmm.h"
64 #include "gromacs/legacyheaders/copyrite.h"
65 #include "gromacs/topology/mtop_util.h"
66 #include "nbnxn_simd.h"
67 #include "nbnxn_search.h"
68 #include "nbnxn_atomdata.h"
69 #include "nbnxn_consts.h"
70 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
71 #include "gromacs/legacyheaders/gmx_detect_hardware.h"
72 #include "gromacs/legacyheaders/inputrec.h"
74 #include "gromacs/pbcutil/ishift.h"
75 #include "gromacs/pbcutil/pbc.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
79 #include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
80 #include "gromacs/legacyheaders/gpu_utils.h"
81 #include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
82 #include "gromacs/legacyheaders/pmalloc_cuda.h"
83 #include "nb_verlet.h"
85 t_forcerec *mk_forcerec(void)
95 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
99 for (i = 0; (i < atnr); i++)
101 for (j = 0; (j < atnr); j++)
103 fprintf(fp, "%2d - %2d", i, j);
106 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
107 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
111 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
112 C12(nbfp, atnr, i, j)/12.0);
119 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
127 snew(nbfp, 3*atnr*atnr);
128 for (i = k = 0; (i < atnr); i++)
130 for (j = 0; (j < atnr); j++, k++)
132 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
133 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
134 /* nbfp now includes the 6.0 derivative prefactor */
135 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
141 snew(nbfp, 2*atnr*atnr);
142 for (i = k = 0; (i < atnr); i++)
144 for (j = 0; (j < atnr); j++, k++)
146 /* nbfp now includes the 6.0/12.0 derivative prefactors */
147 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
148 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
156 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
159 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
162 /* For LJ-PME simulations, we correct the energies with the reciprocal space
163 * inside of the cut-off. To do this the non-bonded kernels needs to have
164 * access to the C6-values used on the reciprocal grid in pme.c
168 snew(grid, 2*atnr*atnr);
169 for (i = k = 0; (i < atnr); i++)
171 for (j = 0; (j < atnr); j++, k++)
173 c6i = idef->iparams[i*(atnr+1)].lj.c6;
174 c12i = idef->iparams[i*(atnr+1)].lj.c12;
175 c6j = idef->iparams[j*(atnr+1)].lj.c6;
176 c12j = idef->iparams[j*(atnr+1)].lj.c12;
177 c6 = sqrt(c6i * c6j);
178 if (fr->ljpme_combination_rule == eljpmeLB
179 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
181 sigmai = pow(c12i / c6i, 1.0/6.0);
182 sigmaj = pow(c12j / c6j, 1.0/6.0);
183 epsi = c6i * c6i / c12i;
184 epsj = c6j * c6j / c12j;
185 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
187 /* Store the elements at the same relative positions as C6 in nbfp in order
188 * to simplify access in the kernels
190 grid[2*(atnr*i+j)] = c6*6.0;
196 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
200 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
204 snew(nbfp, 2*atnr*atnr);
205 for (i = 0; i < atnr; ++i)
207 for (j = 0; j < atnr; ++j)
209 c6i = idef->iparams[i*(atnr+1)].lj.c6;
210 c12i = idef->iparams[i*(atnr+1)].lj.c12;
211 c6j = idef->iparams[j*(atnr+1)].lj.c6;
212 c12j = idef->iparams[j*(atnr+1)].lj.c12;
213 c6 = sqrt(c6i * c6j);
214 c12 = sqrt(c12i * c12j);
215 if (comb_rule == eCOMB_ARITHMETIC
216 && !gmx_numzero(c6) && !gmx_numzero(c12))
218 sigmai = pow(c12i / c6i, 1.0/6.0);
219 sigmaj = pow(c12j / c6j, 1.0/6.0);
220 epsi = c6i * c6i / c12i;
221 epsj = c6j * c6j / c12j;
222 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
223 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
225 C6(nbfp, atnr, i, j) = c6*6.0;
226 C12(nbfp, atnr, i, j) = c12*12.0;
232 /* This routine sets fr->solvent_opt to the most common solvent in the
233 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
234 * the fr->solvent_type array with the correct type (or esolNO).
236 * Charge groups that fulfill the conditions but are not identical to the
237 * most common one will be marked as esolNO in the solvent_type array.
239 * TIP3p is identical to SPC for these purposes, so we call it
240 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
242 * NOTE: QM particle should not
243 * become an optimized solvent. Not even if there is only one charge
253 } solvent_parameters_t;
256 check_solvent_cg(const gmx_moltype_t *molt,
259 const unsigned char *qm_grpnr,
260 const t_grps *qm_grps,
262 int *n_solvent_parameters,
263 solvent_parameters_t **solvent_parameters_p,
267 const t_blocka *excl;
274 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
275 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
278 solvent_parameters_t *solvent_parameters;
280 /* We use a list with parameters for each solvent type.
281 * Every time we discover a new molecule that fulfills the basic
282 * conditions for a solvent we compare with the previous entries
283 * in these lists. If the parameters are the same we just increment
284 * the counter for that type, and otherwise we create a new type
285 * based on the current molecule.
287 * Once we've finished going through all molecules we check which
288 * solvent is most common, and mark all those molecules while we
289 * clear the flag on all others.
292 solvent_parameters = *solvent_parameters_p;
294 /* Mark the cg first as non optimized */
297 /* Check if this cg has no exclusions with atoms in other charge groups
298 * and all atoms inside the charge group excluded.
299 * We only have 3 or 4 atom solvent loops.
301 if (GET_CGINFO_EXCL_INTER(cginfo) ||
302 !GET_CGINFO_EXCL_INTRA(cginfo))
307 /* Get the indices of the first atom in this charge group */
308 j0 = molt->cgs.index[cg0];
309 j1 = molt->cgs.index[cg0+1];
311 /* Number of atoms in our molecule */
317 "Moltype '%s': there are %d atoms in this charge group\n",
321 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
324 if (nj < 3 || nj > 4)
329 /* Check if we are doing QM on this group */
331 if (qm_grpnr != NULL)
333 for (j = j0; j < j1 && !qm; j++)
335 qm = (qm_grpnr[j] < qm_grps->nr - 1);
338 /* Cannot use solvent optimization with QM */
344 atom = molt->atoms.atom;
346 /* Still looks like a solvent, time to check parameters */
348 /* If it is perturbed (free energy) we can't use the solvent loops,
349 * so then we just skip to the next molecule.
353 for (j = j0; j < j1 && !perturbed; j++)
355 perturbed = PERTURBED(atom[j]);
363 /* Now it's only a question if the VdW and charge parameters
364 * are OK. Before doing the check we compare and see if they are
365 * identical to a possible previous solvent type.
366 * First we assign the current types and charges.
368 for (j = 0; j < nj; j++)
370 tmp_vdwtype[j] = atom[j0+j].type;
371 tmp_charge[j] = atom[j0+j].q;
374 /* Does it match any previous solvent type? */
375 for (k = 0; k < *n_solvent_parameters; k++)
380 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
381 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
382 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
387 /* Check that types & charges match for all atoms in molecule */
388 for (j = 0; j < nj && match == TRUE; j++)
390 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
394 if (tmp_charge[j] != solvent_parameters[k].charge[j])
401 /* Congratulations! We have a matched solvent.
402 * Flag it with this type for later processing.
405 solvent_parameters[k].count += nmol;
407 /* We are done with this charge group */
412 /* If we get here, we have a tentative new solvent type.
413 * Before we add it we must check that it fulfills the requirements
414 * of the solvent optimized loops. First determine which atoms have
417 for (j = 0; j < nj; j++)
420 tjA = tmp_vdwtype[j];
422 /* Go through all other tpes and see if any have non-zero
423 * VdW parameters when combined with this one.
425 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
427 /* We already checked that the atoms weren't perturbed,
428 * so we only need to check state A now.
432 has_vdw[j] = (has_vdw[j] ||
433 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
434 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
435 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
440 has_vdw[j] = (has_vdw[j] ||
441 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
442 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
447 /* Now we know all we need to make the final check and assignment. */
451 * For this we require thatn all atoms have charge,
452 * the charges on atom 2 & 3 should be the same, and only
453 * atom 1 might have VdW.
455 if (has_vdw[1] == FALSE &&
456 has_vdw[2] == FALSE &&
457 tmp_charge[0] != 0 &&
458 tmp_charge[1] != 0 &&
459 tmp_charge[2] == tmp_charge[1])
461 srenew(solvent_parameters, *n_solvent_parameters+1);
462 solvent_parameters[*n_solvent_parameters].model = esolSPC;
463 solvent_parameters[*n_solvent_parameters].count = nmol;
464 for (k = 0; k < 3; k++)
466 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
467 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
470 *cg_sp = *n_solvent_parameters;
471 (*n_solvent_parameters)++;
476 /* Or could it be a TIP4P?
477 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
478 * Only atom 1 mght have VdW.
480 if (has_vdw[1] == FALSE &&
481 has_vdw[2] == FALSE &&
482 has_vdw[3] == FALSE &&
483 tmp_charge[0] == 0 &&
484 tmp_charge[1] != 0 &&
485 tmp_charge[2] == tmp_charge[1] &&
488 srenew(solvent_parameters, *n_solvent_parameters+1);
489 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
490 solvent_parameters[*n_solvent_parameters].count = nmol;
491 for (k = 0; k < 4; k++)
493 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
494 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
497 *cg_sp = *n_solvent_parameters;
498 (*n_solvent_parameters)++;
502 *solvent_parameters_p = solvent_parameters;
506 check_solvent(FILE * fp,
507 const gmx_mtop_t * mtop,
509 cginfo_mb_t *cginfo_mb)
512 const t_block * mols;
513 const gmx_moltype_t *molt;
514 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
515 int n_solvent_parameters;
516 solvent_parameters_t *solvent_parameters;
522 fprintf(debug, "Going to determine what solvent types we have.\n");
527 n_solvent_parameters = 0;
528 solvent_parameters = NULL;
529 /* Allocate temporary array for solvent type */
530 snew(cg_sp, mtop->nmolblock);
534 for (mb = 0; mb < mtop->nmolblock; mb++)
536 molt = &mtop->moltype[mtop->molblock[mb].type];
538 /* Here we have to loop over all individual molecules
539 * because we need to check for QMMM particles.
541 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
542 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
543 nmol = mtop->molblock[mb].nmol/nmol_ch;
544 for (mol = 0; mol < nmol_ch; mol++)
547 am = mol*cgs->index[cgs->nr];
548 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
550 check_solvent_cg(molt, cg_mol, nmol,
551 mtop->groups.grpnr[egcQMMM] ?
552 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
553 &mtop->groups.grps[egcQMMM],
555 &n_solvent_parameters, &solvent_parameters,
556 cginfo_mb[mb].cginfo[cgm+cg_mol],
557 &cg_sp[mb][cgm+cg_mol]);
560 cg_offset += cgs->nr;
561 at_offset += cgs->index[cgs->nr];
564 /* Puh! We finished going through all charge groups.
565 * Now find the most common solvent model.
568 /* Most common solvent this far */
570 for (i = 0; i < n_solvent_parameters; i++)
573 solvent_parameters[i].count > solvent_parameters[bestsp].count)
581 bestsol = solvent_parameters[bestsp].model;
588 #ifdef DISABLE_WATER_NLIST
593 for (mb = 0; mb < mtop->nmolblock; mb++)
595 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
596 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
597 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
599 if (cg_sp[mb][i] == bestsp)
601 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
606 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
613 if (bestsol != esolNO && fp != NULL)
615 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
617 solvent_parameters[bestsp].count);
620 sfree(solvent_parameters);
621 fr->solvent_opt = bestsol;
625 acNONE = 0, acCONSTRAINT, acSETTLE
628 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
629 t_forcerec *fr, gmx_bool bNoSolvOpt,
630 gmx_bool *bFEP_NonBonded,
631 gmx_bool *bExcl_IntraCGAll_InterCGNone)
634 const t_blocka *excl;
635 const gmx_moltype_t *molt;
636 const gmx_molblock_t *molb;
637 cginfo_mb_t *cginfo_mb;
640 int cg_offset, a_offset, cgm, am;
641 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
645 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
647 ncg_tot = ncg_mtop(mtop);
648 snew(cginfo_mb, mtop->nmolblock);
650 snew(type_VDW, fr->ntype);
651 for (ai = 0; ai < fr->ntype; ai++)
653 type_VDW[ai] = FALSE;
654 for (j = 0; j < fr->ntype; j++)
656 type_VDW[ai] = type_VDW[ai] ||
658 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
659 C12(fr->nbfp, fr->ntype, ai, j) != 0;
663 *bFEP_NonBonded = FALSE;
664 *bExcl_IntraCGAll_InterCGNone = TRUE;
667 snew(bExcl, excl_nalloc);
670 for (mb = 0; mb < mtop->nmolblock; mb++)
672 molb = &mtop->molblock[mb];
673 molt = &mtop->moltype[molb->type];
677 /* Check if the cginfo is identical for all molecules in this block.
678 * If so, we only need an array of the size of one molecule.
679 * Otherwise we make an array of #mol times #cgs per molecule.
683 for (m = 0; m < molb->nmol; m++)
685 am = m*cgs->index[cgs->nr];
686 for (cg = 0; cg < cgs->nr; cg++)
689 a1 = cgs->index[cg+1];
690 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
691 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
695 if (mtop->groups.grpnr[egcQMMM] != NULL)
697 for (ai = a0; ai < a1; ai++)
699 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
700 mtop->groups.grpnr[egcQMMM][a_offset +ai])
709 cginfo_mb[mb].cg_start = cg_offset;
710 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
711 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
712 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
713 cginfo = cginfo_mb[mb].cginfo;
715 /* Set constraints flags for constrained atoms */
716 snew(a_con, molt->atoms.nr);
717 for (ftype = 0; ftype < F_NRE; ftype++)
719 if (interaction_function[ftype].flags & IF_CONSTRAINT)
724 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
728 for (a = 0; a < nral; a++)
730 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
731 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
737 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
740 am = m*cgs->index[cgs->nr];
741 for (cg = 0; cg < cgs->nr; cg++)
744 a1 = cgs->index[cg+1];
746 /* Store the energy group in cginfo */
747 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
748 SET_CGINFO_GID(cginfo[cgm+cg], gid);
750 /* Check the intra/inter charge group exclusions */
751 if (a1-a0 > excl_nalloc)
753 excl_nalloc = a1 - a0;
754 srenew(bExcl, excl_nalloc);
756 /* bExclIntraAll: all intra cg interactions excluded
757 * bExclInter: any inter cg interactions excluded
759 bExclIntraAll = TRUE;
763 bHavePerturbedAtoms = FALSE;
764 for (ai = a0; ai < a1; ai++)
766 /* Check VDW and electrostatic interactions */
767 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
768 type_VDW[molt->atoms.atom[ai].typeB]);
769 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
770 molt->atoms.atom[ai].qB != 0);
772 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
774 /* Clear the exclusion list for atom ai */
775 for (aj = a0; aj < a1; aj++)
777 bExcl[aj-a0] = FALSE;
779 /* Loop over all the exclusions of atom ai */
780 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
783 if (aj < a0 || aj >= a1)
792 /* Check if ai excludes a0 to a1 */
793 for (aj = a0; aj < a1; aj++)
797 bExclIntraAll = FALSE;
804 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
807 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
815 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
819 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
821 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
823 /* The size in cginfo is currently only read with DD */
824 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
828 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
832 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
834 if (bHavePerturbedAtoms && fr->efep != efepNO)
836 SET_CGINFO_FEP(cginfo[cgm+cg]);
837 *bFEP_NonBonded = TRUE;
839 /* Store the charge group size */
840 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
842 if (!bExclIntraAll || bExclInter)
844 *bExcl_IntraCGAll_InterCGNone = FALSE;
851 cg_offset += molb->nmol*cgs->nr;
852 a_offset += molb->nmol*cgs->index[cgs->nr];
856 /* the solvent optimizer is called after the QM is initialized,
857 * because we don't want to have the QM subsystemto become an
861 check_solvent(fplog, mtop, fr, cginfo_mb);
863 if (getenv("GMX_NO_SOLV_OPT"))
867 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
868 "Disabling all solvent optimization\n");
870 fr->solvent_opt = esolNO;
874 fr->solvent_opt = esolNO;
876 if (!fr->solvent_opt)
878 for (mb = 0; mb < mtop->nmolblock; mb++)
880 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
882 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
890 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
895 ncg = cgi_mb[nmb-1].cg_end;
898 for (cg = 0; cg < ncg; cg++)
900 while (cg >= cgi_mb[mb].cg_end)
905 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
911 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
913 /*This now calculates sum for q and c6*/
914 double qsum, q2sum, q, c6sum, c6;
916 const t_atoms *atoms;
921 for (mb = 0; mb < mtop->nmolblock; mb++)
923 nmol = mtop->molblock[mb].nmol;
924 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
925 for (i = 0; i < atoms->nr; i++)
927 q = atoms->atom[i].q;
930 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
935 fr->q2sum[0] = q2sum;
936 fr->c6sum[0] = c6sum;
938 if (fr->efep != efepNO)
943 for (mb = 0; mb < mtop->nmolblock; mb++)
945 nmol = mtop->molblock[mb].nmol;
946 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
947 for (i = 0; i < atoms->nr; i++)
949 q = atoms->atom[i].qB;
952 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
956 fr->q2sum[1] = q2sum;
957 fr->c6sum[1] = c6sum;
962 fr->qsum[1] = fr->qsum[0];
963 fr->q2sum[1] = fr->q2sum[0];
964 fr->c6sum[1] = fr->c6sum[0];
968 if (fr->efep == efepNO)
970 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
974 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
975 fr->qsum[0], fr->qsum[1]);
980 void update_forcerec(t_forcerec *fr, matrix box)
982 if (fr->eeltype == eelGRF)
984 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
985 fr->rcoulomb, fr->temp, fr->zsquare, box,
986 &fr->kappa, &fr->k_rf, &fr->c_rf);
990 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
992 const t_atoms *atoms, *atoms_tpi;
993 const t_blocka *excl;
994 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
995 gmx_int64_t npair, npair_ij, tmpi, tmpj;
996 double csix, ctwelve;
1000 real *nbfp_comb = NULL;
1006 /* For LJ-PME, we want to correct for the difference between the
1007 * actual C6 values and the C6 values used by the LJ-PME based on
1008 * combination rules. */
1010 if (EVDW_PME(fr->vdwtype))
1012 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1013 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1014 for (tpi = 0; tpi < ntp; ++tpi)
1016 for (tpj = 0; tpj < ntp; ++tpj)
1018 C6(nbfp_comb, ntp, tpi, tpj) =
1019 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1020 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1025 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1033 /* Count the types so we avoid natoms^2 operations */
1034 snew(typecount, ntp);
1035 gmx_mtop_count_atomtypes(mtop, q, typecount);
1037 for (tpi = 0; tpi < ntp; tpi++)
1039 for (tpj = tpi; tpj < ntp; tpj++)
1041 tmpi = typecount[tpi];
1042 tmpj = typecount[tpj];
1045 npair_ij = tmpi*tmpj;
1049 npair_ij = tmpi*(tmpi - 1)/2;
1053 /* nbfp now includes the 6.0 derivative prefactor */
1054 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1058 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1059 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1060 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1066 /* Subtract the excluded pairs.
1067 * The main reason for substracting exclusions is that in some cases
1068 * some combinations might never occur and the parameters could have
1069 * any value. These unused values should not influence the dispersion
1072 for (mb = 0; mb < mtop->nmolblock; mb++)
1074 nmol = mtop->molblock[mb].nmol;
1075 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1076 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1077 for (i = 0; (i < atoms->nr); i++)
1081 tpi = atoms->atom[i].type;
1085 tpi = atoms->atom[i].typeB;
1087 j1 = excl->index[i];
1088 j2 = excl->index[i+1];
1089 for (j = j1; j < j2; j++)
1096 tpj = atoms->atom[k].type;
1100 tpj = atoms->atom[k].typeB;
1104 /* nbfp now includes the 6.0 derivative prefactor */
1105 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1109 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1110 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1111 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1121 /* Only correct for the interaction of the test particle
1122 * with the rest of the system.
1125 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1128 for (mb = 0; mb < mtop->nmolblock; mb++)
1130 nmol = mtop->molblock[mb].nmol;
1131 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1132 for (j = 0; j < atoms->nr; j++)
1135 /* Remove the interaction of the test charge group
1138 if (mb == mtop->nmolblock-1)
1142 if (mb == 0 && nmol == 1)
1144 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1149 tpj = atoms->atom[j].type;
1153 tpj = atoms->atom[j].typeB;
1155 for (i = 0; i < fr->n_tpi; i++)
1159 tpi = atoms_tpi->atom[i].type;
1163 tpi = atoms_tpi->atom[i].typeB;
1167 /* nbfp now includes the 6.0 derivative prefactor */
1168 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1172 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1173 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1174 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1181 if (npair - nexcl <= 0 && fplog)
1183 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1189 csix /= npair - nexcl;
1190 ctwelve /= npair - nexcl;
1194 fprintf(debug, "Counted %d exclusions\n", nexcl);
1195 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1196 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1198 fr->avcsix[q] = csix;
1199 fr->avctwelve[q] = ctwelve;
1202 if (EVDW_PME(fr->vdwtype))
1209 if (fr->eDispCorr == edispcAllEner ||
1210 fr->eDispCorr == edispcAllEnerPres)
1212 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1213 fr->avcsix[0], fr->avctwelve[0]);
1217 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1223 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1224 const gmx_mtop_t *mtop)
1226 const t_atoms *at1, *at2;
1227 int mt1, mt2, i, j, tpi, tpj, ntypes;
1233 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1240 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1242 at1 = &mtop->moltype[mt1].atoms;
1243 for (i = 0; (i < at1->nr); i++)
1245 tpi = at1->atom[i].type;
1248 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1251 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1253 at2 = &mtop->moltype[mt2].atoms;
1254 for (j = 0; (j < at2->nr); j++)
1256 tpj = at2->atom[j].type;
1259 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1261 b = BHAMB(nbfp, ntypes, tpi, tpj);
1262 if (b > fr->bham_b_max)
1266 if ((b < bmin) || (bmin == -1))
1276 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1277 bmin, fr->bham_b_max);
1281 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1282 t_forcerec *fr, real rtab,
1283 const t_commrec *cr,
1284 const char *tabfn, char *eg1, char *eg2,
1294 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1299 sprintf(buf, "%s", tabfn);
1302 /* Append the two energy group names */
1303 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1304 eg1, eg2, ftp2ext(efXVG));
1306 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1307 /* Copy the contents of the table to separate coulomb and LJ tables too,
1308 * to improve cache performance.
1310 /* For performance reasons we want
1311 * the table data to be aligned to 16-byte. The pointers could be freed
1312 * but currently aren't.
1314 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1315 nbl->table_elec.format = nbl->table_elec_vdw.format;
1316 nbl->table_elec.r = nbl->table_elec_vdw.r;
1317 nbl->table_elec.n = nbl->table_elec_vdw.n;
1318 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1319 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1320 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1321 nbl->table_elec.ninteractions = 1;
1322 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1323 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1325 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1326 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1327 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1328 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1329 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1330 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1331 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1332 nbl->table_vdw.ninteractions = 2;
1333 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1334 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1336 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1338 for (j = 0; j < 4; j++)
1340 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1342 for (j = 0; j < 8; j++)
1344 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1349 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1350 int *ncount, int **count)
1352 const gmx_moltype_t *molt;
1354 int mt, ftype, stride, i, j, tabnr;
1356 for (mt = 0; mt < mtop->nmoltype; mt++)
1358 molt = &mtop->moltype[mt];
1359 for (ftype = 0; ftype < F_NRE; ftype++)
1361 if (ftype == ftype1 || ftype == ftype2)
1363 il = &molt->ilist[ftype];
1364 stride = 1 + NRAL(ftype);
1365 for (i = 0; i < il->nr; i += stride)
1367 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1370 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1372 if (tabnr >= *ncount)
1374 srenew(*count, tabnr+1);
1375 for (j = *ncount; j < tabnr+1; j++)
1388 static bondedtable_t *make_bonded_tables(FILE *fplog,
1389 int ftype1, int ftype2,
1390 const gmx_mtop_t *mtop,
1391 const char *basefn, const char *tabext)
1393 int i, ncount, *count;
1401 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1406 for (i = 0; i < ncount; i++)
1410 sprintf(tabfn, "%s", basefn);
1411 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1412 tabext, i, ftp2ext(efXVG));
1413 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1422 void forcerec_set_ranges(t_forcerec *fr,
1423 int ncg_home, int ncg_force,
1425 int natoms_force_constr, int natoms_f_novirsum)
1430 /* fr->ncg_force is unused in the standard code,
1431 * but it can be useful for modified code dealing with charge groups.
1433 fr->ncg_force = ncg_force;
1434 fr->natoms_force = natoms_force;
1435 fr->natoms_force_constr = natoms_force_constr;
1437 if (fr->natoms_force_constr > fr->nalloc_force)
1439 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1443 srenew(fr->f_twin, fr->nalloc_force);
1447 if (fr->bF_NoVirSum)
1449 fr->f_novirsum_n = natoms_f_novirsum;
1450 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1452 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1453 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1458 fr->f_novirsum_n = 0;
1462 static real cutoff_inf(real cutoff)
1466 cutoff = GMX_CUTOFF_INF;
1472 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1473 t_forcerec *fr, const t_inputrec *ir,
1474 const char *tabfn, const gmx_mtop_t *mtop,
1482 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1486 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1488 sprintf(buf, "%s", tabfn);
1489 for (i = 0; i < ir->adress->n_tf_grps; i++)
1491 j = ir->adress->tf_table_index[i]; /* get energy group index */
1492 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1493 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1496 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1498 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1503 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1510 ir->rcoulomb == 0 &&
1512 ir->ePBC == epbcNONE &&
1513 ir->vdwtype == evdwCUT &&
1514 ir->coulombtype == eelCUT &&
1515 ir->efep == efepNO &&
1516 (ir->implicit_solvent == eisNO ||
1517 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1518 ir->gb_algorithm == egbHCT ||
1519 ir->gb_algorithm == egbOBC))) &&
1520 getenv("GMX_NO_ALLVSALL") == NULL
1523 if (bAllvsAll && ir->opts.ngener > 1)
1525 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1531 fprintf(stderr, "\n%s\n", note);
1535 fprintf(fp, "\n%s\n", note);
1541 if (bAllvsAll && fp && MASTER(cr))
1543 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1550 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1554 /* These thread local data structures are used for bondeds only */
1555 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1557 if (fr->nthreads > 1)
1559 snew(fr->f_t, fr->nthreads);
1560 /* Thread 0 uses the global force and energy arrays */
1561 for (t = 1; t < fr->nthreads; t++)
1563 fr->f_t[t].f = NULL;
1564 fr->f_t[t].f_nalloc = 0;
1565 snew(fr->f_t[t].fshift, SHIFTS);
1566 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1567 for (i = 0; i < egNR; i++)
1569 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1576 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1577 const t_commrec *cr,
1578 const t_inputrec *ir,
1581 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1583 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1584 bGPU ? "GPUs" : "SIMD kernels",
1585 bGPU ? "CPU only" : "plain-C kernels");
1593 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1597 *kernel_type = nbnxnk4x4_PlainC;
1598 *ewald_excl = ewaldexclTable;
1600 #ifdef GMX_NBNXN_SIMD
1602 #ifdef GMX_NBNXN_SIMD_4XN
1603 *kernel_type = nbnxnk4xN_SIMD_4xN;
1605 #ifdef GMX_NBNXN_SIMD_2XNN
1606 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1609 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1610 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1611 * Currently this is based on the SIMD acceleration choice,
1612 * but it might be better to decide this at runtime based on CPU.
1614 * 4xN calculates more (zero) interactions, but has less pair-search
1615 * work and much better kernel instruction scheduling.
1617 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1618 * which doesn't have FMA, both the analytical and tabulated Ewald
1619 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1620 * 2x(4+4) because it results in significantly fewer pairs.
1621 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1622 * 10% with HT, 50% without HT. As we currently don't detect the actual
1623 * use of HT, use 4x8 to avoid a potential performance hit.
1624 * On Intel Haswell 4x8 is always faster.
1626 *kernel_type = nbnxnk4xN_SIMD_4xN;
1628 #ifndef GMX_SIMD_HAVE_FMA
1629 if (EEL_PME_EWALD(ir->coulombtype) ||
1630 EVDW_PME(ir->vdwtype))
1632 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1633 * There are enough instructions to make 2x(4+4) efficient.
1635 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1638 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1641 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1643 #ifdef GMX_NBNXN_SIMD_4XN
1644 *kernel_type = nbnxnk4xN_SIMD_4xN;
1646 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1649 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1651 #ifdef GMX_NBNXN_SIMD_2XNN
1652 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1654 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1658 /* Analytical Ewald exclusion correction is only an option in
1660 * Since table lookup's don't parallelize with SIMD, analytical
1661 * will probably always be faster for a SIMD width of 8 or more.
1662 * With FMA analytical is sometimes faster for a width if 4 as well.
1663 * On BlueGene/Q, this is faster regardless of precision.
1664 * In single precision, this is faster on Bulldozer.
1666 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1667 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1668 defined GMX_SIMD_IBM_QPX
1669 *ewald_excl = ewaldexclAnalytical;
1671 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1673 *ewald_excl = ewaldexclTable;
1675 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1677 *ewald_excl = ewaldexclAnalytical;
1681 #endif /* GMX_NBNXN_SIMD */
1685 const char *lookup_nbnxn_kernel_name(int kernel_type)
1687 const char *returnvalue = NULL;
1688 switch (kernel_type)
1691 returnvalue = "not set";
1693 case nbnxnk4x4_PlainC:
1694 returnvalue = "plain C";
1696 case nbnxnk4xN_SIMD_4xN:
1697 case nbnxnk4xN_SIMD_2xNN:
1698 #ifdef GMX_NBNXN_SIMD
1699 #if defined GMX_SIMD_X86_SSE2
1700 returnvalue = "SSE2";
1701 #elif defined GMX_SIMD_X86_SSE4_1
1702 returnvalue = "SSE4.1";
1703 #elif defined GMX_SIMD_X86_AVX_128_FMA
1704 returnvalue = "AVX_128_FMA";
1705 #elif defined GMX_SIMD_X86_AVX_256
1706 returnvalue = "AVX_256";
1707 #elif defined GMX_SIMD_X86_AVX2_256
1708 returnvalue = "AVX2_256";
1710 returnvalue = "SIMD";
1712 #else /* GMX_NBNXN_SIMD */
1713 returnvalue = "not available";
1714 #endif /* GMX_NBNXN_SIMD */
1716 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1717 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1721 gmx_fatal(FARGS, "Illegal kernel type selected");
1728 static void pick_nbnxn_kernel(FILE *fp,
1729 const t_commrec *cr,
1730 gmx_bool use_simd_kernels,
1732 gmx_bool bEmulateGPU,
1733 const t_inputrec *ir,
1736 gmx_bool bDoNonbonded)
1738 assert(kernel_type);
1740 *kernel_type = nbnxnkNotSet;
1741 *ewald_excl = ewaldexclTable;
1745 *kernel_type = nbnxnk8x8x8_PlainC;
1749 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1754 *kernel_type = nbnxnk8x8x8_CUDA;
1757 if (*kernel_type == nbnxnkNotSet)
1759 /* LJ PME with LB combination rule does 7 mesh operations.
1760 * This so slow that we don't compile SIMD non-bonded kernels for that.
1762 if (use_simd_kernels &&
1763 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1765 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1769 *kernel_type = nbnxnk4x4_PlainC;
1773 if (bDoNonbonded && fp != NULL)
1775 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1776 lookup_nbnxn_kernel_name(*kernel_type),
1777 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1778 nbnxn_kernel_to_cj_size(*kernel_type));
1780 if (nbnxnk4x4_PlainC == *kernel_type ||
1781 nbnxnk8x8x8_PlainC == *kernel_type)
1783 md_print_warn(cr, fp,
1784 "WARNING: Using the slow %s kernels. This should\n"
1785 "not happen during routine usage on supported platforms.\n\n",
1786 lookup_nbnxn_kernel_name(*kernel_type));
1791 static void pick_nbnxn_resources(const t_commrec *cr,
1792 const gmx_hw_info_t *hwinfo,
1793 gmx_bool bDoNonbonded,
1795 gmx_bool *bEmulateGPU,
1796 const gmx_gpu_opt_t *gpu_opt)
1798 gmx_bool bEmulateGPUEnvVarSet;
1799 char gpu_err_str[STRLEN];
1803 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1805 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1806 * GPUs (currently) only handle non-bonded calculations, we will
1807 * automatically switch to emulation if non-bonded calculations are
1808 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1809 * way to turn off GPU initialization, data movement, and cleanup.
1811 * GPU emulation can be useful to assess the performance one can expect by
1812 * adding GPU(s) to the machine. The conditional below allows this even
1813 * if mdrun is compiled without GPU acceleration support.
1814 * Note that you should freezing the system as otherwise it will explode.
1816 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1818 gpu_opt->ncuda_dev_use > 0));
1820 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1822 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1824 /* Each PP node will use the intra-node id-th device from the
1825 * list of detected/selected GPUs. */
1826 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1827 &hwinfo->gpu_info, gpu_opt))
1829 /* At this point the init should never fail as we made sure that
1830 * we have all the GPUs we need. If it still does, we'll bail. */
1831 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1833 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1834 cr->rank_pp_intranode),
1838 /* Here we actually turn on hardware GPU acceleration */
1843 gmx_bool uses_simple_tables(int cutoff_scheme,
1844 nonbonded_verlet_t *nbv,
1847 gmx_bool bUsesSimpleTables = TRUE;
1850 switch (cutoff_scheme)
1853 bUsesSimpleTables = TRUE;
1856 assert(NULL != nbv && NULL != nbv->grp);
1857 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1858 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1861 gmx_incons("unimplemented");
1863 return bUsesSimpleTables;
1866 static void init_ewald_f_table(interaction_const_t *ic,
1867 gmx_bool bUsesSimpleTables,
1872 if (bUsesSimpleTables)
1874 /* Get the Ewald table spacing based on Coulomb and/or LJ
1875 * Ewald coefficients and rtol.
1877 ic->tabq_scale = ewald_spline3_table_scale(ic);
1879 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1880 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1884 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1885 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1886 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1889 sfree_aligned(ic->tabq_coul_FDV0);
1890 sfree_aligned(ic->tabq_coul_F);
1891 sfree_aligned(ic->tabq_coul_V);
1893 sfree_aligned(ic->tabq_vdw_FDV0);
1894 sfree_aligned(ic->tabq_vdw_F);
1895 sfree_aligned(ic->tabq_vdw_V);
1897 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1899 /* Create the original table data in FDV0 */
1900 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1901 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1902 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1903 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1904 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1907 if (EVDW_PME(ic->vdwtype))
1909 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1910 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1911 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1912 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1913 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1917 void init_interaction_const_tables(FILE *fp,
1918 interaction_const_t *ic,
1919 gmx_bool bUsesSimpleTables,
1924 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1926 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1930 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1931 1/ic->tabq_scale, ic->tabq_size);
1936 static void clear_force_switch_constants(shift_consts_t *sc)
1943 static void force_switch_constants(real p,
1947 /* Here we determine the coefficient for shifting the force to zero
1948 * between distance rsw and the cut-off rc.
1949 * For a potential of r^-p, we have force p*r^-(p+1).
1950 * But to save flops we absorb p in the coefficient.
1952 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1953 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1955 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1956 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1957 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1960 static void potential_switch_constants(real rsw, real rc,
1961 switch_consts_t *sc)
1963 /* The switch function is 1 at rsw and 0 at rc.
1964 * The derivative and second derivate are zero at both ends.
1965 * rsw = max(r - r_switch, 0)
1966 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1967 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1968 * force = force*dsw - potential*sw
1971 sc->c3 = -10*pow(rc - rsw, -3);
1972 sc->c4 = 15*pow(rc - rsw, -4);
1973 sc->c5 = -6*pow(rc - rsw, -5);
1977 init_interaction_const(FILE *fp,
1978 const t_commrec gmx_unused *cr,
1979 interaction_const_t **interaction_const,
1980 const t_forcerec *fr,
1983 interaction_const_t *ic;
1984 gmx_bool bUsesSimpleTables = TRUE;
1988 /* Just allocate something so we can free it */
1989 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1990 snew_aligned(ic->tabq_coul_F, 16, 32);
1991 snew_aligned(ic->tabq_coul_V, 16, 32);
1993 ic->rlist = fr->rlist;
1994 ic->rlistlong = fr->rlistlong;
1997 ic->vdwtype = fr->vdwtype;
1998 ic->vdw_modifier = fr->vdw_modifier;
1999 ic->rvdw = fr->rvdw;
2000 ic->rvdw_switch = fr->rvdw_switch;
2001 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
2002 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
2003 ic->sh_lj_ewald = 0;
2004 clear_force_switch_constants(&ic->dispersion_shift);
2005 clear_force_switch_constants(&ic->repulsion_shift);
2007 switch (ic->vdw_modifier)
2009 case eintmodPOTSHIFT:
2010 /* Only shift the potential, don't touch the force */
2011 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2012 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2013 if (EVDW_PME(ic->vdwtype))
2017 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2018 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2021 case eintmodFORCESWITCH:
2022 /* Switch the force, switch and shift the potential */
2023 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2024 &ic->dispersion_shift);
2025 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2026 &ic->repulsion_shift);
2028 case eintmodPOTSWITCH:
2029 /* Switch the potential and force */
2030 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2034 case eintmodEXACTCUTOFF:
2035 /* Nothing to do here */
2038 gmx_incons("unimplemented potential modifier");
2041 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2043 /* Electrostatics */
2044 ic->eeltype = fr->eeltype;
2045 ic->coulomb_modifier = fr->coulomb_modifier;
2046 ic->rcoulomb = fr->rcoulomb;
2047 ic->epsilon_r = fr->epsilon_r;
2048 ic->epsfac = fr->epsfac;
2049 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2051 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2053 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2060 /* Reaction-field */
2061 if (EEL_RF(ic->eeltype))
2063 ic->epsilon_rf = fr->epsilon_rf;
2064 ic->k_rf = fr->k_rf;
2065 ic->c_rf = fr->c_rf;
2069 /* For plain cut-off we might use the reaction-field kernels */
2070 ic->epsilon_rf = ic->epsilon_r;
2072 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2074 ic->c_rf = 1/ic->rcoulomb;
2084 real dispersion_shift;
2086 dispersion_shift = ic->dispersion_shift.cpot;
2087 if (EVDW_PME(ic->vdwtype))
2089 dispersion_shift -= ic->sh_lj_ewald;
2091 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2092 ic->repulsion_shift.cpot, dispersion_shift);
2094 if (ic->eeltype == eelCUT)
2096 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2098 else if (EEL_PME(ic->eeltype))
2100 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2105 *interaction_const = ic;
2107 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2109 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2111 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2112 * also sharing texture references. To keep the code simple, we don't
2113 * treat texture references as shared resources, but this means that
2114 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2115 * Hence, to ensure that the non-bonded kernels don't start before all
2116 * texture binding operations are finished, we need to wait for all ranks
2117 * to arrive here before continuing.
2119 * Note that we could omit this barrier if GPUs are not shared (or
2120 * texture objects are used), but as this is initialization code, there
2121 * is not point in complicating things.
2123 #ifdef GMX_THREAD_MPI
2128 #endif /* GMX_THREAD_MPI */
2131 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2132 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2135 static void init_nb_verlet(FILE *fp,
2136 nonbonded_verlet_t **nb_verlet,
2137 gmx_bool bFEP_NonBonded,
2138 const t_inputrec *ir,
2139 const t_forcerec *fr,
2140 const t_commrec *cr,
2141 const char *nbpu_opt)
2143 nonbonded_verlet_t *nbv;
2146 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2148 nbnxn_alloc_t *nb_alloc;
2149 nbnxn_free_t *nb_free;
2153 pick_nbnxn_resources(cr, fr->hwinfo,
2161 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2162 for (i = 0; i < nbv->ngrp; i++)
2164 nbv->grp[i].nbl_lists.nnbl = 0;
2165 nbv->grp[i].nbat = NULL;
2166 nbv->grp[i].kernel_type = nbnxnkNotSet;
2168 if (i == 0) /* local */
2170 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2171 nbv->bUseGPU, bEmulateGPU, ir,
2172 &nbv->grp[i].kernel_type,
2173 &nbv->grp[i].ewald_excl,
2176 else /* non-local */
2178 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2180 /* Use GPU for local, select a CPU kernel for non-local */
2181 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2183 &nbv->grp[i].kernel_type,
2184 &nbv->grp[i].ewald_excl,
2187 bHybridGPURun = TRUE;
2191 /* Use the same kernel for local and non-local interactions */
2192 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2193 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2200 /* init the NxN GPU data; the last argument tells whether we'll have
2201 * both local and non-local NB calculation on GPU */
2202 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2203 &fr->hwinfo->gpu_info, fr->gpu_opt,
2204 cr->rank_pp_intranode,
2205 (nbv->ngrp > 1) && !bHybridGPURun);
2207 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2211 nbv->min_ci_balanced = strtol(env, &end, 10);
2212 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2214 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2219 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2220 nbv->min_ci_balanced);
2225 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2228 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2229 nbv->min_ci_balanced);
2235 nbv->min_ci_balanced = 0;
2240 nbnxn_init_search(&nbv->nbs,
2241 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2242 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2244 gmx_omp_nthreads_get(emntPairsearch));
2246 for (i = 0; i < nbv->ngrp; i++)
2248 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2250 nb_alloc = &pmalloc;
2259 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2260 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2261 /* 8x8x8 "non-simple" lists are ATM always combined */
2262 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2266 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2268 gmx_bool bSimpleList;
2269 int enbnxninitcombrule;
2271 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2273 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2275 /* Plain LJ cut-off: we can optimize with combination rules */
2276 enbnxninitcombrule = enbnxninitcombruleDETECT;
2278 else if (fr->vdwtype == evdwPME)
2280 /* LJ-PME: we need to use a combination rule for the grid */
2281 if (fr->ljpme_combination_rule == eljpmeGEOM)
2283 enbnxninitcombrule = enbnxninitcombruleGEOM;
2287 enbnxninitcombrule = enbnxninitcombruleLB;
2292 /* We use a full combination matrix: no rule required */
2293 enbnxninitcombrule = enbnxninitcombruleNONE;
2297 snew(nbv->grp[i].nbat, 1);
2298 nbnxn_atomdata_init(fp,
2300 nbv->grp[i].kernel_type,
2302 fr->ntype, fr->nbfp,
2304 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2309 nbv->grp[i].nbat = nbv->grp[0].nbat;
2314 void init_forcerec(FILE *fp,
2315 const output_env_t oenv,
2318 const t_inputrec *ir,
2319 const gmx_mtop_t *mtop,
2320 const t_commrec *cr,
2326 const char *nbpu_opt,
2327 gmx_bool bNoSolvOpt,
2330 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2335 gmx_bool bGenericKernelOnly;
2336 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2337 gmx_bool bFEP_NonBonded;
2339 int *nm_ind, egp_flags;
2341 if (fr->hwinfo == NULL)
2343 /* Detect hardware, gather information.
2344 * In mdrun, hwinfo has already been set before calling init_forcerec.
2345 * Here we ignore GPUs, as tools will not use them anyhow.
2347 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2350 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2351 fr->use_simd_kernels = TRUE;
2353 fr->bDomDec = DOMAINDECOMP(cr);
2355 natoms = mtop->natoms;
2357 if (check_box(ir->ePBC, box))
2359 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2362 /* Test particle insertion ? */
2365 /* Set to the size of the molecule to be inserted (the last one) */
2366 /* Because of old style topologies, we have to use the last cg
2367 * instead of the last molecule type.
2369 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2370 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2371 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2373 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2381 /* Copy AdResS parameters */
2384 fr->adress_type = ir->adress->type;
2385 fr->adress_const_wf = ir->adress->const_wf;
2386 fr->adress_ex_width = ir->adress->ex_width;
2387 fr->adress_hy_width = ir->adress->hy_width;
2388 fr->adress_icor = ir->adress->icor;
2389 fr->adress_site = ir->adress->site;
2390 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2391 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2394 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2395 for (i = 0; i < ir->adress->n_energy_grps; i++)
2397 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2400 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2401 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2402 for (i = 0; i < fr->n_adress_tf_grps; i++)
2404 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2406 copy_rvec(ir->adress->refs, fr->adress_refs);
2410 fr->adress_type = eAdressOff;
2411 fr->adress_do_hybridpairs = FALSE;
2414 /* Copy the user determined parameters */
2415 fr->userint1 = ir->userint1;
2416 fr->userint2 = ir->userint2;
2417 fr->userint3 = ir->userint3;
2418 fr->userint4 = ir->userint4;
2419 fr->userreal1 = ir->userreal1;
2420 fr->userreal2 = ir->userreal2;
2421 fr->userreal3 = ir->userreal3;
2422 fr->userreal4 = ir->userreal4;
2425 fr->fc_stepsize = ir->fc_stepsize;
2428 fr->efep = ir->efep;
2429 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2430 if (ir->fepvals->bScCoul)
2432 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2433 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2437 fr->sc_alphacoul = 0;
2438 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2440 fr->sc_power = ir->fepvals->sc_power;
2441 fr->sc_r_power = ir->fepvals->sc_r_power;
2442 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2444 env = getenv("GMX_SCSIGMA_MIN");
2448 sscanf(env, "%lf", &dbl);
2449 fr->sc_sigma6_min = pow(dbl, 6);
2452 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2456 fr->bNonbonded = TRUE;
2457 if (getenv("GMX_NO_NONBONDED") != NULL)
2459 /* turn off non-bonded calculations */
2460 fr->bNonbonded = FALSE;
2461 md_print_warn(cr, fp,
2462 "Found environment variable GMX_NO_NONBONDED.\n"
2463 "Disabling nonbonded calculations.\n");
2466 bGenericKernelOnly = FALSE;
2468 /* We now check in the NS code whether a particular combination of interactions
2469 * can be used with water optimization, and disable it if that is not the case.
2472 if (getenv("GMX_NB_GENERIC") != NULL)
2477 "Found environment variable GMX_NB_GENERIC.\n"
2478 "Disabling all interaction-specific nonbonded kernels, will only\n"
2479 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2481 bGenericKernelOnly = TRUE;
2484 if (bGenericKernelOnly == TRUE)
2489 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2491 fr->use_simd_kernels = FALSE;
2495 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2496 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2500 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2502 /* Check if we can/should do all-vs-all kernels */
2503 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2504 fr->AllvsAll_work = NULL;
2505 fr->AllvsAll_workgb = NULL;
2507 /* All-vs-all kernels have not been implemented in 4.6, and
2508 * the SIMD group kernels are also buggy in this case. Non-SIMD
2509 * group kernels are OK. See Redmine #1249. */
2512 fr->bAllvsAll = FALSE;
2513 fr->use_simd_kernels = FALSE;
2517 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2518 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2519 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2520 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2521 "we are proceeding by disabling all CPU architecture-specific\n"
2522 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2523 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2527 /* Neighbour searching stuff */
2528 fr->cutoff_scheme = ir->cutoff_scheme;
2529 fr->bGrid = (ir->ns_type == ensGRID);
2530 fr->ePBC = ir->ePBC;
2532 if (fr->cutoff_scheme == ecutsGROUP)
2534 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2535 "removed in a future release when 'verlet' supports all interaction forms.\n";
2539 fprintf(stderr, "\n%s\n", note);
2543 fprintf(fp, "\n%s\n", note);
2547 /* Determine if we will do PBC for distances in bonded interactions */
2548 if (fr->ePBC == epbcNONE)
2550 fr->bMolPBC = FALSE;
2554 if (!DOMAINDECOMP(cr))
2556 /* The group cut-off scheme and SHAKE assume charge groups
2557 * are whole, but not using molpbc is faster in most cases.
2559 if (fr->cutoff_scheme == ecutsGROUP ||
2560 (ir->eConstrAlg == econtSHAKE &&
2561 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2562 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2564 fr->bMolPBC = ir->bPeriodicMols;
2569 if (getenv("GMX_USE_GRAPH") != NULL)
2571 fr->bMolPBC = FALSE;
2574 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2581 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2584 fr->bGB = (ir->implicit_solvent == eisGBSA);
2586 fr->rc_scaling = ir->refcoord_scaling;
2587 copy_rvec(ir->posres_com, fr->posres_com);
2588 copy_rvec(ir->posres_comB, fr->posres_comB);
2589 fr->rlist = cutoff_inf(ir->rlist);
2590 fr->rlistlong = cutoff_inf(ir->rlistlong);
2591 fr->eeltype = ir->coulombtype;
2592 fr->vdwtype = ir->vdwtype;
2593 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2595 fr->coulomb_modifier = ir->coulomb_modifier;
2596 fr->vdw_modifier = ir->vdw_modifier;
2598 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2599 switch (fr->eeltype)
2602 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2608 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2612 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2613 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2622 case eelPMEUSERSWITCH:
2623 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2628 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2632 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2636 /* Vdw: Translate from mdp settings to kernel format */
2637 switch (fr->vdwtype)
2642 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2646 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2650 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2656 case evdwENCADSHIFT:
2657 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2661 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2665 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2666 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2667 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2669 fr->rvdw = cutoff_inf(ir->rvdw);
2670 fr->rvdw_switch = ir->rvdw_switch;
2671 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2672 fr->rcoulomb_switch = ir->rcoulomb_switch;
2674 fr->bTwinRange = fr->rlistlong > fr->rlist;
2675 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2677 fr->reppow = mtop->ffparams.reppow;
2679 if (ir->cutoff_scheme == ecutsGROUP)
2681 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2682 && !EVDW_PME(fr->vdwtype));
2683 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2684 fr->bcoultab = !(fr->eeltype == eelCUT ||
2685 fr->eeltype == eelEWALD ||
2686 fr->eeltype == eelPME ||
2687 fr->eeltype == eelRF ||
2688 fr->eeltype == eelRF_ZERO);
2690 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2691 * going to be faster to tabulate the interaction than calling the generic kernel.
2692 * However, if generic kernels have been requested we keep things analytically.
2694 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2695 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2696 bGenericKernelOnly == FALSE)
2698 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2700 fr->bcoultab = TRUE;
2701 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2702 * which would otherwise need two tables.
2706 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2707 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2708 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2709 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2711 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2713 fr->bcoultab = TRUE;
2717 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2719 fr->bcoultab = TRUE;
2721 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2726 if (getenv("GMX_REQUIRE_TABLES"))
2729 fr->bcoultab = TRUE;
2734 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2735 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2738 if (fr->bvdwtab == TRUE)
2740 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2741 fr->nbkernel_vdw_modifier = eintmodNONE;
2743 if (fr->bcoultab == TRUE)
2745 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2746 fr->nbkernel_elec_modifier = eintmodNONE;
2750 if (ir->cutoff_scheme == ecutsVERLET)
2752 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2754 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2756 fr->bvdwtab = FALSE;
2757 fr->bcoultab = FALSE;
2760 /* Tables are used for direct ewald sum */
2763 if (EEL_PME(ir->coulombtype))
2767 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2769 if (ir->coulombtype == eelP3M_AD)
2771 please_cite(fp, "Hockney1988");
2772 please_cite(fp, "Ballenegger2012");
2776 please_cite(fp, "Essmann95a");
2779 if (ir->ewald_geometry == eewg3DC)
2783 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2785 please_cite(fp, "In-Chul99a");
2788 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2789 init_ewald_tab(&(fr->ewald_table), ir, fp);
2792 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2793 1/fr->ewaldcoeff_q);
2797 if (EVDW_PME(ir->vdwtype))
2801 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2803 please_cite(fp, "Essmann95a");
2804 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2807 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2808 1/fr->ewaldcoeff_lj);
2812 /* Electrostatics */
2813 fr->epsilon_r = ir->epsilon_r;
2814 fr->epsilon_rf = ir->epsilon_rf;
2815 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2817 /* Parameters for generalized RF */
2821 if (fr->eeltype == eelGRF)
2823 init_generalized_rf(fp, mtop, ir, fr);
2826 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2827 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2828 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2829 IR_ELEC_FIELD(*ir) ||
2830 (fr->adress_icor != eAdressICOff)
2833 if (fr->cutoff_scheme == ecutsGROUP &&
2834 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2836 /* Count the total number of charge groups */
2837 fr->cg_nalloc = ncg_mtop(mtop);
2838 srenew(fr->cg_cm, fr->cg_nalloc);
2840 if (fr->shift_vec == NULL)
2842 snew(fr->shift_vec, SHIFTS);
2845 if (fr->fshift == NULL)
2847 snew(fr->fshift, SHIFTS);
2850 if (fr->nbfp == NULL)
2852 fr->ntype = mtop->ffparams.atnr;
2853 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2854 if (EVDW_PME(fr->vdwtype))
2856 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2860 /* Copy the energy group exclusions */
2861 fr->egp_flags = ir->opts.egp_flags;
2863 /* Van der Waals stuff */
2864 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2866 if (fr->rvdw_switch >= fr->rvdw)
2868 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2869 fr->rvdw_switch, fr->rvdw);
2873 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2874 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2875 fr->rvdw_switch, fr->rvdw);
2879 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2881 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2884 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2886 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2889 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2891 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2896 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2897 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2900 fr->eDispCorr = ir->eDispCorr;
2901 if (ir->eDispCorr != edispcNO)
2903 set_avcsixtwelve(fp, fr, mtop);
2908 set_bham_b_max(fp, fr, mtop);
2911 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2913 /* Copy the GBSA data (radius, volume and surftens for each
2914 * atomtype) from the topology atomtype section to forcerec.
2916 snew(fr->atype_radius, fr->ntype);
2917 snew(fr->atype_vol, fr->ntype);
2918 snew(fr->atype_surftens, fr->ntype);
2919 snew(fr->atype_gb_radius, fr->ntype);
2920 snew(fr->atype_S_hct, fr->ntype);
2922 if (mtop->atomtypes.nr > 0)
2924 for (i = 0; i < fr->ntype; i++)
2926 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2928 for (i = 0; i < fr->ntype; i++)
2930 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2932 for (i = 0; i < fr->ntype; i++)
2934 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2936 for (i = 0; i < fr->ntype; i++)
2938 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2940 for (i = 0; i < fr->ntype; i++)
2942 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2946 /* Generate the GB table if needed */
2950 fr->gbtabscale = 2000;
2952 fr->gbtabscale = 500;
2956 fr->gbtab = make_gb_table(oenv, fr);
2958 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2960 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2961 if (!DOMAINDECOMP(cr))
2963 make_local_gb(cr, fr->born, ir->gb_algorithm);
2967 /* Set the charge scaling */
2968 if (fr->epsilon_r != 0)
2970 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2974 /* eps = 0 is infinite dieletric: no coulomb interactions */
2978 /* Reaction field constants */
2979 if (EEL_RF(fr->eeltype))
2981 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2982 fr->rcoulomb, fr->temp, fr->zsquare, box,
2983 &fr->kappa, &fr->k_rf, &fr->c_rf);
2986 /*This now calculates sum for q and c6*/
2987 set_chargesum(fp, fr, mtop);
2989 /* if we are using LR electrostatics, and they are tabulated,
2990 * the tables will contain modified coulomb interactions.
2991 * Since we want to use the non-shifted ones for 1-4
2992 * coulombic interactions, we must have an extra set of tables.
2995 /* Construct tables.
2996 * A little unnecessary to make both vdw and coul tables sometimes,
2997 * but what the heck... */
2999 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
3000 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
3002 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
3003 fr->coulomb_modifier != eintmodNONE ||
3004 fr->vdw_modifier != eintmodNONE ||
3005 fr->bBHAM || fr->bEwald) &&
3006 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
3007 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
3008 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
3010 negp_pp = ir->opts.ngener - ir->nwall;
3014 bSomeNormalNbListsAreInUse = TRUE;
3019 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3020 for (egi = 0; egi < negp_pp; egi++)
3022 for (egj = egi; egj < negp_pp; egj++)
3024 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3025 if (!(egp_flags & EGP_EXCL))
3027 if (egp_flags & EGP_TABLE)
3033 bSomeNormalNbListsAreInUse = TRUE;
3038 if (bSomeNormalNbListsAreInUse)
3040 fr->nnblists = negptable + 1;
3044 fr->nnblists = negptable;
3046 if (fr->nnblists > 1)
3048 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3057 snew(fr->nblists, fr->nnblists);
3059 /* This code automatically gives table length tabext without cut-off's,
3060 * in that case grompp should already have checked that we do not need
3061 * normal tables and we only generate tables for 1-4 interactions.
3063 rtab = ir->rlistlong + ir->tabext;
3067 /* make tables for ordinary interactions */
3068 if (bSomeNormalNbListsAreInUse)
3070 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3073 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3075 if (!bMakeSeparate14Table)
3077 fr->tab14 = fr->nblists[0].table_elec_vdw;
3087 /* Read the special tables for certain energy group pairs */
3088 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3089 for (egi = 0; egi < negp_pp; egi++)
3091 for (egj = egi; egj < negp_pp; egj++)
3093 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3094 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3096 nbl = &(fr->nblists[m]);
3097 if (fr->nnblists > 1)
3099 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3101 /* Read the table file with the two energy groups names appended */
3102 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3103 *mtop->groups.grpname[nm_ind[egi]],
3104 *mtop->groups.grpname[nm_ind[egj]],
3108 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3109 *mtop->groups.grpname[nm_ind[egi]],
3110 *mtop->groups.grpname[nm_ind[egj]],
3111 &fr->nblists[fr->nnblists/2+m]);
3115 else if (fr->nnblists > 1)
3117 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3123 else if ((fr->eDispCorr != edispcNO) &&
3124 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3125 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3126 (fr->vdw_modifier == eintmodPOTSHIFT)))
3128 /* Tables might not be used for the potential modifier interactions per se, but
3129 * we still need them to evaluate switch/shift dispersion corrections in this case.
3131 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3134 if (bMakeSeparate14Table)
3136 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3137 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3138 GMX_MAKETABLES_14ONLY);
3141 /* Read AdResS Thermo Force table if needed */
3142 if (fr->adress_icor == eAdressICThermoForce)
3144 /* old todo replace */
3146 if (ir->adress->n_tf_grps > 0)
3148 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3153 /* load the default table */
3154 snew(fr->atf_tabs, 1);
3155 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3160 fr->nwall = ir->nwall;
3161 if (ir->nwall && ir->wall_type == ewtTABLE)
3163 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3168 fcd->bondtab = make_bonded_tables(fp,
3169 F_TABBONDS, F_TABBONDSNC,
3171 fcd->angletab = make_bonded_tables(fp,
3174 fcd->dihtab = make_bonded_tables(fp,
3182 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3186 /* QM/MM initialization if requested
3190 fprintf(stderr, "QM/MM calculation requested.\n");
3193 fr->bQMMM = ir->bQMMM;
3194 fr->qr = mk_QMMMrec();
3196 /* Set all the static charge group info */
3197 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3199 &fr->bExcl_IntraCGAll_InterCGNone);
3200 if (DOMAINDECOMP(cr))
3206 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3209 if (!DOMAINDECOMP(cr))
3211 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3212 mtop->natoms, mtop->natoms, mtop->natoms);
3215 fr->print_force = print_force;
3218 /* coarse load balancing vars */
3223 /* Initialize neighbor search */
3224 init_ns(fp, cr, &fr->ns, fr, mtop);
3226 if (cr->duty & DUTY_PP)
3228 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3232 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3237 /* Initialize the thread working data for bonded interactions */
3238 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3240 snew(fr->excl_load, fr->nthreads+1);
3242 if (fr->cutoff_scheme == ecutsVERLET)
3244 if (ir->rcoulomb != ir->rvdw)
3246 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3249 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3252 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3253 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3255 if (ir->eDispCorr != edispcNO)
3257 calc_enervirdiff(fp, ir->eDispCorr, fr);
3261 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3262 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3263 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3265 void pr_forcerec(FILE *fp, t_forcerec *fr)
3269 pr_real(fp, fr->rlist);
3270 pr_real(fp, fr->rcoulomb);
3271 pr_real(fp, fr->fudgeQQ);
3272 pr_bool(fp, fr->bGrid);
3273 pr_bool(fp, fr->bTwinRange);
3274 /*pr_int(fp,fr->cg0);
3275 pr_int(fp,fr->hcg);*/
3276 for (i = 0; i < fr->nnblists; i++)
3278 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3280 pr_real(fp, fr->rcoulomb_switch);
3281 pr_real(fp, fr->rcoulomb);
3286 void forcerec_set_excl_load(t_forcerec *fr,
3287 const gmx_localtop_t *top)
3290 int t, i, j, ntot, n, ntarget;
3292 ind = top->excls.index;
3296 for (i = 0; i < top->excls.nr; i++)
3298 for (j = ind[i]; j < ind[i+1]; j++)
3307 fr->excl_load[0] = 0;
3310 for (t = 1; t <= fr->nthreads; t++)
3312 ntarget = (ntot*t)/fr->nthreads;
3313 while (i < top->excls.nr && n < ntarget)
3315 for (j = ind[i]; j < ind[i+1]; j++)
3324 fr->excl_load[t] = i;
3328 /* Frees GPU memory and destroys the CUDA context.
3330 * Note that this function needs to be called even if GPUs are not used
3331 * in this run because the PME ranks have no knowledge of whether GPUs
3332 * are used or not, but all ranks need to enter the barrier below.
3334 void free_gpu_resources(const t_forcerec *fr,
3335 const t_commrec *cr)
3337 gmx_bool bIsPPrankUsingGPU;
3338 char gpu_err_str[STRLEN];
3340 bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
3342 if (bIsPPrankUsingGPU)
3344 /* free nbnxn data in GPU memory */
3345 nbnxn_cuda_free(fr->nbv->cu_nbv);
3347 /* With tMPI we need to wait for all ranks to finish deallocation before
3348 * destroying the context in free_gpu() as some ranks may be sharing
3350 * Note: as only PP ranks need to free GPU resources, so it is safe to
3351 * not call the barrier on PME ranks.
3353 #ifdef GMX_THREAD_MPI
3358 #endif /* GMX_THREAD_MPI */
3360 /* uninitialize GPU (by destroying the context) */
3361 if (!free_gpu(gpu_err_str))
3363 gmx_warning("On rank %d failed to free GPU #%d: %s",
3364 cr->nodeid, get_current_gpu_device_id(), gpu_err_str);