1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
50 #include "gmx_fatal.h"
51 #include "gmx_fatal_collective.h"
55 #include "nonbonded.h"
64 #include "md_support.h"
65 #include "md_logging.h"
70 #include "mtop_util.h"
71 #include "nbnxn_search.h"
72 #include "nbnxn_atomdata.h"
73 #include "nbnxn_consts.h"
75 #include "gmx_omp_nthreads.h"
76 #include "gmx_detect_hardware.h"
79 /* MSVC definition for __cpuid() */
83 #include "types/nbnxn_cuda_types_ext.h"
84 #include "gpu_utils.h"
85 #include "nbnxn_cuda_data_mgmt.h"
86 #include "pmalloc_cuda.h"
88 t_forcerec *mk_forcerec(void)
98 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
102 for (i = 0; (i < atnr); i++)
104 for (j = 0; (j < atnr); j++)
106 fprintf(fp, "%2d - %2d", i, j);
109 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
110 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
114 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
115 C12(nbfp, atnr, i, j)/12.0);
122 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
130 snew(nbfp, 3*atnr*atnr);
131 for (i = k = 0; (i < atnr); i++)
133 for (j = 0; (j < atnr); j++, k++)
135 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
136 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
137 /* nbfp now includes the 6.0 derivative prefactor */
138 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
144 snew(nbfp, 2*atnr*atnr);
145 for (i = k = 0; (i < atnr); i++)
147 for (j = 0; (j < atnr); j++, k++)
149 /* nbfp now includes the 6.0/12.0 derivative prefactors */
150 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
151 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
159 /* This routine sets fr->solvent_opt to the most common solvent in the
160 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
161 * the fr->solvent_type array with the correct type (or esolNO).
163 * Charge groups that fulfill the conditions but are not identical to the
164 * most common one will be marked as esolNO in the solvent_type array.
166 * TIP3p is identical to SPC for these purposes, so we call it
167 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
169 * NOTE: QM particle should not
170 * become an optimized solvent. Not even if there is only one charge
180 } solvent_parameters_t;
183 check_solvent_cg(const gmx_moltype_t *molt,
186 const unsigned char *qm_grpnr,
187 const t_grps *qm_grps,
189 int *n_solvent_parameters,
190 solvent_parameters_t **solvent_parameters_p,
194 const t_blocka * excl;
205 solvent_parameters_t *solvent_parameters;
207 /* We use a list with parameters for each solvent type.
208 * Every time we discover a new molecule that fulfills the basic
209 * conditions for a solvent we compare with the previous entries
210 * in these lists. If the parameters are the same we just increment
211 * the counter for that type, and otherwise we create a new type
212 * based on the current molecule.
214 * Once we've finished going through all molecules we check which
215 * solvent is most common, and mark all those molecules while we
216 * clear the flag on all others.
219 solvent_parameters = *solvent_parameters_p;
221 /* Mark the cg first as non optimized */
224 /* Check if this cg has no exclusions with atoms in other charge groups
225 * and all atoms inside the charge group excluded.
226 * We only have 3 or 4 atom solvent loops.
228 if (GET_CGINFO_EXCL_INTER(cginfo) ||
229 !GET_CGINFO_EXCL_INTRA(cginfo))
234 /* Get the indices of the first atom in this charge group */
235 j0 = molt->cgs.index[cg0];
236 j1 = molt->cgs.index[cg0+1];
238 /* Number of atoms in our molecule */
244 "Moltype '%s': there are %d atoms in this charge group\n",
248 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
251 if (nj < 3 || nj > 4)
256 /* Check if we are doing QM on this group */
258 if (qm_grpnr != NULL)
260 for (j = j0; j < j1 && !qm; j++)
262 qm = (qm_grpnr[j] < qm_grps->nr - 1);
265 /* Cannot use solvent optimization with QM */
271 atom = molt->atoms.atom;
273 /* Still looks like a solvent, time to check parameters */
275 /* If it is perturbed (free energy) we can't use the solvent loops,
276 * so then we just skip to the next molecule.
280 for (j = j0; j < j1 && !perturbed; j++)
282 perturbed = PERTURBED(atom[j]);
290 /* Now it's only a question if the VdW and charge parameters
291 * are OK. Before doing the check we compare and see if they are
292 * identical to a possible previous solvent type.
293 * First we assign the current types and charges.
295 for (j = 0; j < nj; j++)
297 tmp_vdwtype[j] = atom[j0+j].type;
298 tmp_charge[j] = atom[j0+j].q;
301 /* Does it match any previous solvent type? */
302 for (k = 0; k < *n_solvent_parameters; k++)
307 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
308 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
309 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
314 /* Check that types & charges match for all atoms in molecule */
315 for (j = 0; j < nj && match == TRUE; j++)
317 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
321 if (tmp_charge[j] != solvent_parameters[k].charge[j])
328 /* Congratulations! We have a matched solvent.
329 * Flag it with this type for later processing.
332 solvent_parameters[k].count += nmol;
334 /* We are done with this charge group */
339 /* If we get here, we have a tentative new solvent type.
340 * Before we add it we must check that it fulfills the requirements
341 * of the solvent optimized loops. First determine which atoms have
344 for (j = 0; j < nj; j++)
347 tjA = tmp_vdwtype[j];
349 /* Go through all other tpes and see if any have non-zero
350 * VdW parameters when combined with this one.
352 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
354 /* We already checked that the atoms weren't perturbed,
355 * so we only need to check state A now.
359 has_vdw[j] = (has_vdw[j] ||
360 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
361 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
362 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
367 has_vdw[j] = (has_vdw[j] ||
368 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
369 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
374 /* Now we know all we need to make the final check and assignment. */
378 * For this we require thatn all atoms have charge,
379 * the charges on atom 2 & 3 should be the same, and only
380 * atom 1 might have VdW.
382 if (has_vdw[1] == FALSE &&
383 has_vdw[2] == FALSE &&
384 tmp_charge[0] != 0 &&
385 tmp_charge[1] != 0 &&
386 tmp_charge[2] == tmp_charge[1])
388 srenew(solvent_parameters, *n_solvent_parameters+1);
389 solvent_parameters[*n_solvent_parameters].model = esolSPC;
390 solvent_parameters[*n_solvent_parameters].count = nmol;
391 for (k = 0; k < 3; k++)
393 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
394 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
397 *cg_sp = *n_solvent_parameters;
398 (*n_solvent_parameters)++;
403 /* Or could it be a TIP4P?
404 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
405 * Only atom 1 mght have VdW.
407 if (has_vdw[1] == FALSE &&
408 has_vdw[2] == FALSE &&
409 has_vdw[3] == FALSE &&
410 tmp_charge[0] == 0 &&
411 tmp_charge[1] != 0 &&
412 tmp_charge[2] == tmp_charge[1] &&
415 srenew(solvent_parameters, *n_solvent_parameters+1);
416 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
417 solvent_parameters[*n_solvent_parameters].count = nmol;
418 for (k = 0; k < 4; k++)
420 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
421 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
424 *cg_sp = *n_solvent_parameters;
425 (*n_solvent_parameters)++;
429 *solvent_parameters_p = solvent_parameters;
433 check_solvent(FILE * fp,
434 const gmx_mtop_t * mtop,
436 cginfo_mb_t *cginfo_mb)
439 const t_block * mols;
440 const gmx_moltype_t *molt;
441 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
442 int n_solvent_parameters;
443 solvent_parameters_t *solvent_parameters;
449 fprintf(debug, "Going to determine what solvent types we have.\n");
454 n_solvent_parameters = 0;
455 solvent_parameters = NULL;
456 /* Allocate temporary array for solvent type */
457 snew(cg_sp, mtop->nmolblock);
461 for (mb = 0; mb < mtop->nmolblock; mb++)
463 molt = &mtop->moltype[mtop->molblock[mb].type];
465 /* Here we have to loop over all individual molecules
466 * because we need to check for QMMM particles.
468 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
469 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
470 nmol = mtop->molblock[mb].nmol/nmol_ch;
471 for (mol = 0; mol < nmol_ch; mol++)
474 am = mol*cgs->index[cgs->nr];
475 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
477 check_solvent_cg(molt, cg_mol, nmol,
478 mtop->groups.grpnr[egcQMMM] ?
479 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
480 &mtop->groups.grps[egcQMMM],
482 &n_solvent_parameters, &solvent_parameters,
483 cginfo_mb[mb].cginfo[cgm+cg_mol],
484 &cg_sp[mb][cgm+cg_mol]);
487 cg_offset += cgs->nr;
488 at_offset += cgs->index[cgs->nr];
491 /* Puh! We finished going through all charge groups.
492 * Now find the most common solvent model.
495 /* Most common solvent this far */
497 for (i = 0; i < n_solvent_parameters; i++)
500 solvent_parameters[i].count > solvent_parameters[bestsp].count)
508 bestsol = solvent_parameters[bestsp].model;
515 #ifdef DISABLE_WATER_NLIST
520 for (mb = 0; mb < mtop->nmolblock; mb++)
522 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
523 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
524 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
526 if (cg_sp[mb][i] == bestsp)
528 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
533 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
540 if (bestsol != esolNO && fp != NULL)
542 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
544 solvent_parameters[bestsp].count);
547 sfree(solvent_parameters);
548 fr->solvent_opt = bestsol;
552 acNONE = 0, acCONSTRAINT, acSETTLE
555 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
556 t_forcerec *fr, gmx_bool bNoSolvOpt,
557 gmx_bool *bExcl_IntraCGAll_InterCGNone)
560 const t_blocka *excl;
561 const gmx_moltype_t *molt;
562 const gmx_molblock_t *molb;
563 cginfo_mb_t *cginfo_mb;
566 int cg_offset, a_offset, cgm, am;
567 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
571 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
573 ncg_tot = ncg_mtop(mtop);
574 snew(cginfo_mb, mtop->nmolblock);
576 snew(type_VDW, fr->ntype);
577 for (ai = 0; ai < fr->ntype; ai++)
579 type_VDW[ai] = FALSE;
580 for (j = 0; j < fr->ntype; j++)
582 type_VDW[ai] = type_VDW[ai] ||
584 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
585 C12(fr->nbfp, fr->ntype, ai, j) != 0;
589 *bExcl_IntraCGAll_InterCGNone = TRUE;
592 snew(bExcl, excl_nalloc);
595 for (mb = 0; mb < mtop->nmolblock; mb++)
597 molb = &mtop->molblock[mb];
598 molt = &mtop->moltype[molb->type];
602 /* Check if the cginfo is identical for all molecules in this block.
603 * If so, we only need an array of the size of one molecule.
604 * Otherwise we make an array of #mol times #cgs per molecule.
608 for (m = 0; m < molb->nmol; m++)
610 am = m*cgs->index[cgs->nr];
611 for (cg = 0; cg < cgs->nr; cg++)
614 a1 = cgs->index[cg+1];
615 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
616 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
620 if (mtop->groups.grpnr[egcQMMM] != NULL)
622 for (ai = a0; ai < a1; ai++)
624 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
625 mtop->groups.grpnr[egcQMMM][a_offset +ai])
634 cginfo_mb[mb].cg_start = cg_offset;
635 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
636 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
637 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
638 cginfo = cginfo_mb[mb].cginfo;
640 /* Set constraints flags for constrained atoms */
641 snew(a_con, molt->atoms.nr);
642 for (ftype = 0; ftype < F_NRE; ftype++)
644 if (interaction_function[ftype].flags & IF_CONSTRAINT)
649 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
653 for (a = 0; a < nral; a++)
655 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
656 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
662 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
665 am = m*cgs->index[cgs->nr];
666 for (cg = 0; cg < cgs->nr; cg++)
669 a1 = cgs->index[cg+1];
671 /* Store the energy group in cginfo */
672 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
673 SET_CGINFO_GID(cginfo[cgm+cg], gid);
675 /* Check the intra/inter charge group exclusions */
676 if (a1-a0 > excl_nalloc)
678 excl_nalloc = a1 - a0;
679 srenew(bExcl, excl_nalloc);
681 /* bExclIntraAll: all intra cg interactions excluded
682 * bExclInter: any inter cg interactions excluded
684 bExclIntraAll = TRUE;
688 for (ai = a0; ai < a1; ai++)
690 /* Check VDW and electrostatic interactions */
691 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
692 type_VDW[molt->atoms.atom[ai].typeB]);
693 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
694 molt->atoms.atom[ai].qB != 0);
696 /* Clear the exclusion list for atom ai */
697 for (aj = a0; aj < a1; aj++)
699 bExcl[aj-a0] = FALSE;
701 /* Loop over all the exclusions of atom ai */
702 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
705 if (aj < a0 || aj >= a1)
714 /* Check if ai excludes a0 to a1 */
715 for (aj = a0; aj < a1; aj++)
719 bExclIntraAll = FALSE;
726 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
729 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
737 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
741 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
743 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
745 /* The size in cginfo is currently only read with DD */
746 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
750 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
754 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
756 /* Store the charge group size */
757 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
759 if (!bExclIntraAll || bExclInter)
761 *bExcl_IntraCGAll_InterCGNone = FALSE;
768 cg_offset += molb->nmol*cgs->nr;
769 a_offset += molb->nmol*cgs->index[cgs->nr];
773 /* the solvent optimizer is called after the QM is initialized,
774 * because we don't want to have the QM subsystemto become an
778 check_solvent(fplog, mtop, fr, cginfo_mb);
780 if (getenv("GMX_NO_SOLV_OPT"))
784 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
785 "Disabling all solvent optimization\n");
787 fr->solvent_opt = esolNO;
791 fr->solvent_opt = esolNO;
793 if (!fr->solvent_opt)
795 for (mb = 0; mb < mtop->nmolblock; mb++)
797 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
799 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
807 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
812 ncg = cgi_mb[nmb-1].cg_end;
815 for (cg = 0; cg < ncg; cg++)
817 while (cg >= cgi_mb[mb].cg_end)
822 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
828 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
830 double qsum, q2sum, q;
832 const t_atoms *atoms;
836 for (mb = 0; mb < mtop->nmolblock; mb++)
838 nmol = mtop->molblock[mb].nmol;
839 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
840 for (i = 0; i < atoms->nr; i++)
842 q = atoms->atom[i].q;
848 fr->q2sum[0] = q2sum;
849 if (fr->efep != efepNO)
853 for (mb = 0; mb < mtop->nmolblock; mb++)
855 nmol = mtop->molblock[mb].nmol;
856 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
857 for (i = 0; i < atoms->nr; i++)
859 q = atoms->atom[i].qB;
864 fr->q2sum[1] = q2sum;
869 fr->qsum[1] = fr->qsum[0];
870 fr->q2sum[1] = fr->q2sum[0];
874 if (fr->efep == efepNO)
876 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
880 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
881 fr->qsum[0], fr->qsum[1]);
886 void update_forcerec(FILE *log, t_forcerec *fr, matrix box)
888 if (fr->eeltype == eelGRF)
890 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
891 fr->rcoulomb, fr->temp, fr->zsquare, box,
892 &fr->kappa, &fr->k_rf, &fr->c_rf);
896 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
898 const t_atoms *atoms, *atoms_tpi;
899 const t_blocka *excl;
900 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
901 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
902 long long int npair, npair_ij, tmpi, tmpj;
904 double npair, npair_ij, tmpi, tmpj;
906 double csix, ctwelve;
915 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
923 /* Count the types so we avoid natoms^2 operations */
924 snew(typecount, ntp);
925 for (mb = 0; mb < mtop->nmolblock; mb++)
927 nmol = mtop->molblock[mb].nmol;
928 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
929 for (i = 0; i < atoms->nr; i++)
933 tpi = atoms->atom[i].type;
937 tpi = atoms->atom[i].typeB;
939 typecount[tpi] += nmol;
942 for (tpi = 0; tpi < ntp; tpi++)
944 for (tpj = tpi; tpj < ntp; tpj++)
946 tmpi = typecount[tpi];
947 tmpj = typecount[tpj];
950 npair_ij = tmpi*tmpj;
954 npair_ij = tmpi*(tmpi - 1)/2;
958 /* nbfp now includes the 6.0 derivative prefactor */
959 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
963 /* nbfp now includes the 6.0/12.0 derivative prefactors */
964 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
965 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
971 /* Subtract the excluded pairs.
972 * The main reason for substracting exclusions is that in some cases
973 * some combinations might never occur and the parameters could have
974 * any value. These unused values should not influence the dispersion
977 for (mb = 0; mb < mtop->nmolblock; mb++)
979 nmol = mtop->molblock[mb].nmol;
980 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
981 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
982 for (i = 0; (i < atoms->nr); i++)
986 tpi = atoms->atom[i].type;
990 tpi = atoms->atom[i].typeB;
993 j2 = excl->index[i+1];
994 for (j = j1; j < j2; j++)
1001 tpj = atoms->atom[k].type;
1005 tpj = atoms->atom[k].typeB;
1009 /* nbfp now includes the 6.0 derivative prefactor */
1010 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1014 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1015 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1016 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1026 /* Only correct for the interaction of the test particle
1027 * with the rest of the system.
1030 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1033 for (mb = 0; mb < mtop->nmolblock; mb++)
1035 nmol = mtop->molblock[mb].nmol;
1036 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1037 for (j = 0; j < atoms->nr; j++)
1040 /* Remove the interaction of the test charge group
1043 if (mb == mtop->nmolblock-1)
1047 if (mb == 0 && nmol == 1)
1049 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1054 tpj = atoms->atom[j].type;
1058 tpj = atoms->atom[j].typeB;
1060 for (i = 0; i < fr->n_tpi; i++)
1064 tpi = atoms_tpi->atom[i].type;
1068 tpi = atoms_tpi->atom[i].typeB;
1072 /* nbfp now includes the 6.0 derivative prefactor */
1073 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1077 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1078 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1079 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1086 if (npair - nexcl <= 0 && fplog)
1088 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1094 csix /= npair - nexcl;
1095 ctwelve /= npair - nexcl;
1099 fprintf(debug, "Counted %d exclusions\n", nexcl);
1100 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1101 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1103 fr->avcsix[q] = csix;
1104 fr->avctwelve[q] = ctwelve;
1108 if (fr->eDispCorr == edispcAllEner ||
1109 fr->eDispCorr == edispcAllEnerPres)
1111 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1112 fr->avcsix[0], fr->avctwelve[0]);
1116 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1122 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1123 const gmx_mtop_t *mtop)
1125 const t_atoms *at1, *at2;
1126 int mt1, mt2, i, j, tpi, tpj, ntypes;
1132 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1139 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1141 at1 = &mtop->moltype[mt1].atoms;
1142 for (i = 0; (i < at1->nr); i++)
1144 tpi = at1->atom[i].type;
1147 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1150 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1152 at2 = &mtop->moltype[mt2].atoms;
1153 for (j = 0; (j < at2->nr); j++)
1155 tpj = at2->atom[j].type;
1158 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1160 b = BHAMB(nbfp, ntypes, tpi, tpj);
1161 if (b > fr->bham_b_max)
1165 if ((b < bmin) || (bmin == -1))
1175 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1176 bmin, fr->bham_b_max);
1180 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1181 t_forcerec *fr, real rtab,
1182 const t_commrec *cr,
1183 const char *tabfn, char *eg1, char *eg2,
1193 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1198 sprintf(buf, "%s", tabfn);
1201 /* Append the two energy group names */
1202 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1203 eg1, eg2, ftp2ext(efXVG));
1205 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1206 /* Copy the contents of the table to separate coulomb and LJ tables too,
1207 * to improve cache performance.
1209 /* For performance reasons we want
1210 * the table data to be aligned to 16-byte. The pointers could be freed
1211 * but currently aren't.
1213 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1214 nbl->table_elec.format = nbl->table_elec_vdw.format;
1215 nbl->table_elec.r = nbl->table_elec_vdw.r;
1216 nbl->table_elec.n = nbl->table_elec_vdw.n;
1217 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1218 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1219 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1220 nbl->table_elec.ninteractions = 1;
1221 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1222 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1224 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1225 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1226 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1227 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1228 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1229 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1230 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1231 nbl->table_vdw.ninteractions = 2;
1232 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1233 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1235 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1237 for (j = 0; j < 4; j++)
1239 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1241 for (j = 0; j < 8; j++)
1243 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1248 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1249 int *ncount, int **count)
1251 const gmx_moltype_t *molt;
1253 int mt, ftype, stride, i, j, tabnr;
1255 for (mt = 0; mt < mtop->nmoltype; mt++)
1257 molt = &mtop->moltype[mt];
1258 for (ftype = 0; ftype < F_NRE; ftype++)
1260 if (ftype == ftype1 || ftype == ftype2)
1262 il = &molt->ilist[ftype];
1263 stride = 1 + NRAL(ftype);
1264 for (i = 0; i < il->nr; i += stride)
1266 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1269 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1271 if (tabnr >= *ncount)
1273 srenew(*count, tabnr+1);
1274 for (j = *ncount; j < tabnr+1; j++)
1287 static bondedtable_t *make_bonded_tables(FILE *fplog,
1288 int ftype1, int ftype2,
1289 const gmx_mtop_t *mtop,
1290 const char *basefn, const char *tabext)
1292 int i, ncount, *count;
1300 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1305 for (i = 0; i < ncount; i++)
1309 sprintf(tabfn, "%s", basefn);
1310 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1311 tabext, i, ftp2ext(efXVG));
1312 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1321 void forcerec_set_ranges(t_forcerec *fr,
1322 int ncg_home, int ncg_force,
1324 int natoms_force_constr, int natoms_f_novirsum)
1329 /* fr->ncg_force is unused in the standard code,
1330 * but it can be useful for modified code dealing with charge groups.
1332 fr->ncg_force = ncg_force;
1333 fr->natoms_force = natoms_force;
1334 fr->natoms_force_constr = natoms_force_constr;
1336 if (fr->natoms_force_constr > fr->nalloc_force)
1338 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1342 srenew(fr->f_twin, fr->nalloc_force);
1346 if (fr->bF_NoVirSum)
1348 fr->f_novirsum_n = natoms_f_novirsum;
1349 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1351 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1352 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1357 fr->f_novirsum_n = 0;
1361 static real cutoff_inf(real cutoff)
1365 cutoff = GMX_CUTOFF_INF;
1371 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1372 t_forcerec *fr, const t_inputrec *ir,
1373 const char *tabfn, const gmx_mtop_t *mtop,
1381 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1385 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1387 sprintf(buf, "%s", tabfn);
1388 for (i = 0; i < ir->adress->n_tf_grps; i++)
1390 j = ir->adress->tf_table_index[i]; /* get energy group index */
1391 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1392 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1395 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1397 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1402 gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
1403 gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1410 ir->rcoulomb == 0 &&
1412 ir->ePBC == epbcNONE &&
1413 ir->vdwtype == evdwCUT &&
1414 ir->coulombtype == eelCUT &&
1415 ir->efep == efepNO &&
1416 (ir->implicit_solvent == eisNO ||
1417 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1418 ir->gb_algorithm == egbHCT ||
1419 ir->gb_algorithm == egbOBC))) &&
1420 getenv("GMX_NO_ALLVSALL") == NULL
1423 if (bAllvsAll && ir->opts.ngener > 1)
1425 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1431 fprintf(stderr, "\n%s\n", note);
1435 fprintf(fp, "\n%s\n", note);
1441 if (bAllvsAll && fp && MASTER(cr))
1443 fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1450 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1454 /* These thread local data structures are used for bondeds only */
1455 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1457 if (fr->nthreads > 1)
1459 snew(fr->f_t, fr->nthreads);
1460 /* Thread 0 uses the global force and energy arrays */
1461 for (t = 1; t < fr->nthreads; t++)
1463 fr->f_t[t].f = NULL;
1464 fr->f_t[t].f_nalloc = 0;
1465 snew(fr->f_t[t].fshift, SHIFTS);
1466 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1467 for (i = 0; i < egNR; i++)
1469 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1476 static void pick_nbnxn_kernel_cpu(FILE *fp,
1477 const t_commrec *cr,
1478 const gmx_cpuid_t cpuid_info,
1479 const t_inputrec *ir,
1483 *kernel_type = nbnxnk4x4_PlainC;
1484 *ewald_excl = ewaldexclTable;
1486 #ifdef GMX_NBNXN_SIMD
1488 #ifdef GMX_NBNXN_SIMD_4XN
1489 *kernel_type = nbnxnk4xN_SIMD_4xN;
1491 #ifdef GMX_NBNXN_SIMD_2XNN
1492 /* We expect the 2xNN kernels to be faster in most cases */
1493 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1496 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1497 if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1499 /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1500 * 10% with HT, 50% without HT, but extra zeros interactions
1501 * can compensate. As we currently don't detect the actual use
1502 * of HT, switch to 4x8 to avoid a potential performance hit.
1504 *kernel_type = nbnxnk4xN_SIMD_4xN;
1507 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1509 #ifdef GMX_NBNXN_SIMD_4XN
1510 *kernel_type = nbnxnk4xN_SIMD_4xN;
1512 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1515 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1517 #ifdef GMX_NBNXN_SIMD_2XNN
1518 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1520 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1524 /* Analytical Ewald exclusion correction is only an option in the
1525 * x86 SIMD kernel. This is faster in single precision
1526 * on Bulldozer and slightly faster on Sandy Bridge.
1528 #if (defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE
1529 *ewald_excl = ewaldexclAnalytical;
1531 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1533 *ewald_excl = ewaldexclTable;
1535 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1537 *ewald_excl = ewaldexclAnalytical;
1541 #endif /* GMX_X86_SSE2 */
1545 const char *lookup_nbnxn_kernel_name(int kernel_type)
1547 const char *returnvalue = NULL;
1548 switch (kernel_type)
1550 case nbnxnkNotSet: returnvalue = "not set"; break;
1551 case nbnxnk4x4_PlainC: returnvalue = "plain C"; break;
1552 #ifndef GMX_NBNXN_SIMD
1553 case nbnxnk4xN_SIMD_4xN: returnvalue = "not available"; break;
1554 case nbnxnk4xN_SIMD_2xNN: returnvalue = "not available"; break;
1557 #if GMX_NBNXN_SIMD_BITWIDTH == 128
1558 /* x86 SIMD intrinsics can be converted to either SSE or AVX depending
1559 * on compiler flags. As we use nearly identical intrinsics, using an AVX
1560 * compiler flag without an AVX macro effectively results in AVX kernels.
1561 * For gcc we check for __AVX__
1562 * At least a check for icc should be added (if there is a macro)
1564 #if !(defined GMX_X86_AVX_128_FMA || defined __AVX__)
1565 #ifndef GMX_X86_SSE4_1
1566 case nbnxnk4xN_SIMD_4xN: returnvalue = "SSE2"; break;
1567 case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE2"; break;
1569 case nbnxnk4xN_SIMD_4xN: returnvalue = "SSE4.1"; break;
1570 case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE4.1"; break;
1573 case nbnxnk4xN_SIMD_4xN: returnvalue = "AVX-128"; break;
1574 case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-128"; break;
1577 #if GMX_NBNXN_SIMD_BITWIDTH == 256
1578 case nbnxnk4xN_SIMD_4xN: returnvalue = "AVX-256"; break;
1579 case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-256"; break;
1581 #else /* not GMX_X86_SSE2 */
1582 case nbnxnk4xN_SIMD_4xN: returnvalue = "SIMD"; break;
1583 case nbnxnk4xN_SIMD_2xNN: returnvalue = "SIMD"; break;
1586 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1587 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1591 gmx_fatal(FARGS, "Illegal kernel type selected");
1598 static void pick_nbnxn_kernel(FILE *fp,
1599 const t_commrec *cr,
1600 const gmx_hw_info_t *hwinfo,
1601 gmx_bool use_cpu_acceleration,
1603 gmx_bool bEmulateGPU,
1604 const t_inputrec *ir,
1607 gmx_bool bDoNonbonded)
1609 assert(kernel_type);
1611 *kernel_type = nbnxnkNotSet;
1612 *ewald_excl = ewaldexclTable;
1616 *kernel_type = nbnxnk8x8x8_PlainC;
1620 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1625 *kernel_type = nbnxnk8x8x8_CUDA;
1628 if (*kernel_type == nbnxnkNotSet)
1630 if (use_cpu_acceleration)
1632 pick_nbnxn_kernel_cpu(fp, cr, hwinfo->cpuid_info, ir,
1633 kernel_type, ewald_excl);
1637 *kernel_type = nbnxnk4x4_PlainC;
1641 if (bDoNonbonded && fp != NULL)
1643 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1644 lookup_nbnxn_kernel_name(*kernel_type),
1645 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1646 nbnxn_kernel_to_cj_size(*kernel_type));
1650 static void pick_nbnxn_resources(FILE *fp,
1651 const t_commrec *cr,
1652 const gmx_hw_info_t *hwinfo,
1653 gmx_bool bDoNonbonded,
1655 gmx_bool *bEmulateGPU)
1657 gmx_bool bEmulateGPUEnvVarSet;
1658 char gpu_err_str[STRLEN];
1662 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1664 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1665 * GPUs (currently) only handle non-bonded calculations, we will
1666 * automatically switch to emulation if non-bonded calculations are
1667 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1668 * way to turn off GPU initialization, data movement, and cleanup.
1670 * GPU emulation can be useful to assess the performance one can expect by
1671 * adding GPU(s) to the machine. The conditional below allows this even
1672 * if mdrun is compiled without GPU acceleration support.
1673 * Note that you should freezing the system as otherwise it will explode.
1675 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1676 (!bDoNonbonded && hwinfo->bCanUseGPU));
1678 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1680 if (hwinfo->bCanUseGPU && !(*bEmulateGPU))
1682 /* Each PP node will use the intra-node id-th device from the
1683 * list of detected/selected GPUs. */
1684 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, &hwinfo->gpu_info))
1686 /* At this point the init should never fail as we made sure that
1687 * we have all the GPUs we need. If it still does, we'll bail. */
1688 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1690 get_gpu_device_id(&hwinfo->gpu_info, cr->rank_pp_intranode),
1694 /* Here we actually turn on hardware GPU acceleration */
1699 gmx_bool uses_simple_tables(int cutoff_scheme,
1700 nonbonded_verlet_t *nbv,
1703 gmx_bool bUsesSimpleTables = TRUE;
1706 switch (cutoff_scheme)
1709 bUsesSimpleTables = TRUE;
1712 assert(NULL != nbv && NULL != nbv->grp);
1713 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1714 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1717 gmx_incons("unimplemented");
1719 return bUsesSimpleTables;
1722 static void init_ewald_f_table(interaction_const_t *ic,
1723 gmx_bool bUsesSimpleTables,
1728 if (bUsesSimpleTables)
1730 /* With a spacing of 0.0005 we are at the force summation accuracy
1731 * for the SSE kernels for "normal" atomistic simulations.
1733 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
1736 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1737 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1741 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1742 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1743 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1746 sfree_aligned(ic->tabq_coul_FDV0);
1747 sfree_aligned(ic->tabq_coul_F);
1748 sfree_aligned(ic->tabq_coul_V);
1750 /* Create the original table data in FDV0 */
1751 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1752 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1753 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1754 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1755 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff);
1758 void init_interaction_const_tables(FILE *fp,
1759 interaction_const_t *ic,
1760 gmx_bool bUsesSimpleTables,
1765 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1767 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1771 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1772 1/ic->tabq_scale, ic->tabq_size);
1777 void init_interaction_const(FILE *fp,
1778 interaction_const_t **interaction_const,
1779 const t_forcerec *fr,
1782 interaction_const_t *ic;
1783 gmx_bool bUsesSimpleTables = TRUE;
1787 /* Just allocate something so we can free it */
1788 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1789 snew_aligned(ic->tabq_coul_F, 16, 32);
1790 snew_aligned(ic->tabq_coul_V, 16, 32);
1792 ic->rlist = fr->rlist;
1793 ic->rlistlong = fr->rlistlong;
1796 ic->rvdw = fr->rvdw;
1797 if (fr->vdw_modifier == eintmodPOTSHIFT)
1799 ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1806 /* Electrostatics */
1807 ic->eeltype = fr->eeltype;
1808 ic->rcoulomb = fr->rcoulomb;
1809 ic->epsilon_r = fr->epsilon_r;
1810 ic->epsfac = fr->epsfac;
1813 ic->ewaldcoeff = fr->ewaldcoeff;
1814 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1816 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
1823 /* Reaction-field */
1824 if (EEL_RF(ic->eeltype))
1826 ic->epsilon_rf = fr->epsilon_rf;
1827 ic->k_rf = fr->k_rf;
1828 ic->c_rf = fr->c_rf;
1832 /* For plain cut-off we might use the reaction-field kernels */
1833 ic->epsilon_rf = ic->epsilon_r;
1835 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1837 ic->c_rf = 1/ic->rcoulomb;
1847 fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1848 sqr(ic->sh_invrc6), ic->sh_invrc6);
1849 if (ic->eeltype == eelCUT)
1851 fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1853 else if (EEL_PME(ic->eeltype))
1855 fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1860 *interaction_const = ic;
1862 if (fr->nbv != NULL && fr->nbv->bUseGPU)
1864 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1867 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1868 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1871 static void init_nb_verlet(FILE *fp,
1872 nonbonded_verlet_t **nb_verlet,
1873 const t_inputrec *ir,
1874 const t_forcerec *fr,
1875 const t_commrec *cr,
1876 const char *nbpu_opt)
1878 nonbonded_verlet_t *nbv;
1881 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
1883 nbnxn_alloc_t *nb_alloc;
1884 nbnxn_free_t *nb_free;
1888 pick_nbnxn_resources(fp, cr, fr->hwinfo,
1895 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1896 for (i = 0; i < nbv->ngrp; i++)
1898 nbv->grp[i].nbl_lists.nnbl = 0;
1899 nbv->grp[i].nbat = NULL;
1900 nbv->grp[i].kernel_type = nbnxnkNotSet;
1902 if (i == 0) /* local */
1904 pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1905 nbv->bUseGPU, bEmulateGPU,
1907 &nbv->grp[i].kernel_type,
1908 &nbv->grp[i].ewald_excl,
1911 else /* non-local */
1913 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
1915 /* Use GPU for local, select a CPU kernel for non-local */
1916 pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1919 &nbv->grp[i].kernel_type,
1920 &nbv->grp[i].ewald_excl,
1923 bHybridGPURun = TRUE;
1927 /* Use the same kernel for local and non-local interactions */
1928 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
1929 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
1936 /* init the NxN GPU data; the last argument tells whether we'll have
1937 * both local and non-local NB calculation on GPU */
1938 nbnxn_cuda_init(fp, &nbv->cu_nbv,
1939 &fr->hwinfo->gpu_info, cr->rank_pp_intranode,
1940 (nbv->ngrp > 1) && !bHybridGPURun);
1942 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
1946 nbv->min_ci_balanced = strtol(env, &end, 10);
1947 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
1949 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
1954 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
1955 nbv->min_ci_balanced);
1960 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
1963 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
1964 nbv->min_ci_balanced);
1970 nbv->min_ci_balanced = 0;
1975 nbnxn_init_search(&nbv->nbs,
1976 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
1977 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
1978 gmx_omp_nthreads_get(emntNonbonded));
1980 for (i = 0; i < nbv->ngrp; i++)
1982 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
1984 nb_alloc = &pmalloc;
1993 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
1994 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
1995 /* 8x8x8 "non-simple" lists are ATM always combined */
1996 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2000 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2002 snew(nbv->grp[i].nbat, 1);
2003 nbnxn_atomdata_init(fp,
2005 nbv->grp[i].kernel_type,
2006 fr->ntype, fr->nbfp,
2008 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2013 nbv->grp[i].nbat = nbv->grp[0].nbat;
2018 void init_forcerec(FILE *fp,
2019 const output_env_t oenv,
2022 const t_inputrec *ir,
2023 const gmx_mtop_t *mtop,
2024 const t_commrec *cr,
2031 const char *nbpu_opt,
2032 gmx_bool bNoSolvOpt,
2035 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2041 gmx_bool bGenericKernelOnly;
2042 gmx_bool bTab, bSep14tab, bNormalnblists;
2044 int *nm_ind, egp_flags;
2046 if (fr->hwinfo == NULL)
2048 /* Detect hardware, gather information.
2049 * In mdrun, hwinfo has already been set before calling init_forcerec.
2050 * Here we ignore GPUs, as tools will not use them anyhow.
2052 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE, FALSE, NULL);
2055 /* By default we turn acceleration on, but it might be turned off further down... */
2056 fr->use_cpu_acceleration = TRUE;
2058 fr->bDomDec = DOMAINDECOMP(cr);
2060 natoms = mtop->natoms;
2062 if (check_box(ir->ePBC, box))
2064 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2067 /* Test particle insertion ? */
2070 /* Set to the size of the molecule to be inserted (the last one) */
2071 /* Because of old style topologies, we have to use the last cg
2072 * instead of the last molecule type.
2074 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2075 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2076 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2078 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2086 /* Copy AdResS parameters */
2089 fr->adress_type = ir->adress->type;
2090 fr->adress_const_wf = ir->adress->const_wf;
2091 fr->adress_ex_width = ir->adress->ex_width;
2092 fr->adress_hy_width = ir->adress->hy_width;
2093 fr->adress_icor = ir->adress->icor;
2094 fr->adress_site = ir->adress->site;
2095 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2096 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2099 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2100 for (i = 0; i < ir->adress->n_energy_grps; i++)
2102 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2105 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2106 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2107 for (i = 0; i < fr->n_adress_tf_grps; i++)
2109 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2111 copy_rvec(ir->adress->refs, fr->adress_refs);
2115 fr->adress_type = eAdressOff;
2116 fr->adress_do_hybridpairs = FALSE;
2119 /* Copy the user determined parameters */
2120 fr->userint1 = ir->userint1;
2121 fr->userint2 = ir->userint2;
2122 fr->userint3 = ir->userint3;
2123 fr->userint4 = ir->userint4;
2124 fr->userreal1 = ir->userreal1;
2125 fr->userreal2 = ir->userreal2;
2126 fr->userreal3 = ir->userreal3;
2127 fr->userreal4 = ir->userreal4;
2130 fr->fc_stepsize = ir->fc_stepsize;
2133 fr->efep = ir->efep;
2134 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2135 if (ir->fepvals->bScCoul)
2137 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2138 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2142 fr->sc_alphacoul = 0;
2143 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2145 fr->sc_power = ir->fepvals->sc_power;
2146 fr->sc_r_power = ir->fepvals->sc_r_power;
2147 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2149 env = getenv("GMX_SCSIGMA_MIN");
2153 sscanf(env, "%lf", &dbl);
2154 fr->sc_sigma6_min = pow(dbl, 6);
2157 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2161 fr->bNonbonded = TRUE;
2162 if (getenv("GMX_NO_NONBONDED") != NULL)
2164 /* turn off non-bonded calculations */
2165 fr->bNonbonded = FALSE;
2166 md_print_warn(cr, fp,
2167 "Found environment variable GMX_NO_NONBONDED.\n"
2168 "Disabling nonbonded calculations.\n");
2171 bGenericKernelOnly = FALSE;
2173 /* We now check in the NS code whether a particular combination of interactions
2174 * can be used with water optimization, and disable it if that is not the case.
2177 if (getenv("GMX_NB_GENERIC") != NULL)
2182 "Found environment variable GMX_NB_GENERIC.\n"
2183 "Disabling all interaction-specific nonbonded kernels, will only\n"
2184 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2186 bGenericKernelOnly = TRUE;
2189 if (bGenericKernelOnly == TRUE)
2194 if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2196 fr->use_cpu_acceleration = FALSE;
2200 "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2201 "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2205 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2207 /* Check if we can/should do all-vs-all kernels */
2208 fr->bAllvsAll = can_use_allvsall(ir, mtop, FALSE, NULL, NULL);
2209 fr->AllvsAll_work = NULL;
2210 fr->AllvsAll_workgb = NULL;
2212 /* All-vs-all kernels have not been implemented in 4.6, and
2213 * the SIMD group kernels are also buggy in this case. Non-accelerated
2214 * group kernels are OK. See Redmine #1249. */
2217 fr->bAllvsAll = FALSE;
2218 fr->use_cpu_acceleration = FALSE;
2222 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2223 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2224 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2225 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2226 "we are proceeding by disabling all CPU architecture-specific\n"
2227 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2228 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2232 /* Neighbour searching stuff */
2233 fr->cutoff_scheme = ir->cutoff_scheme;
2234 fr->bGrid = (ir->ns_type == ensGRID);
2235 fr->ePBC = ir->ePBC;
2237 /* Determine if we will do PBC for distances in bonded interactions */
2238 if (fr->ePBC == epbcNONE)
2240 fr->bMolPBC = FALSE;
2244 if (!DOMAINDECOMP(cr))
2246 /* The group cut-off scheme and SHAKE assume charge groups
2247 * are whole, but not using molpbc is faster in most cases.
2249 if (fr->cutoff_scheme == ecutsGROUP ||
2250 (ir->eConstrAlg == econtSHAKE &&
2251 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2252 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2254 fr->bMolPBC = ir->bPeriodicMols;
2259 if (getenv("GMX_USE_GRAPH") != NULL)
2261 fr->bMolPBC = FALSE;
2264 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2271 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2274 fr->bGB = (ir->implicit_solvent == eisGBSA);
2276 fr->rc_scaling = ir->refcoord_scaling;
2277 copy_rvec(ir->posres_com, fr->posres_com);
2278 copy_rvec(ir->posres_comB, fr->posres_comB);
2279 fr->rlist = cutoff_inf(ir->rlist);
2280 fr->rlistlong = cutoff_inf(ir->rlistlong);
2281 fr->eeltype = ir->coulombtype;
2282 fr->vdwtype = ir->vdwtype;
2284 fr->coulomb_modifier = ir->coulomb_modifier;
2285 fr->vdw_modifier = ir->vdw_modifier;
2287 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2288 switch (fr->eeltype)
2291 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2297 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2301 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2302 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2311 case eelPMEUSERSWITCH:
2312 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2317 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2321 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2325 /* Vdw: Translate from mdp settings to kernel format */
2326 switch (fr->vdwtype)
2331 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2335 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2342 case evdwENCADSHIFT:
2343 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2347 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2351 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2352 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2353 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2355 fr->bTwinRange = fr->rlistlong > fr->rlist;
2356 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2358 fr->reppow = mtop->ffparams.reppow;
2360 if (ir->cutoff_scheme == ecutsGROUP)
2362 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2363 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2364 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2365 fr->bcoultab = !(fr->eeltype == eelCUT ||
2366 fr->eeltype == eelEWALD ||
2367 fr->eeltype == eelPME ||
2368 fr->eeltype == eelRF ||
2369 fr->eeltype == eelRF_ZERO);
2371 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2372 * going to be faster to tabulate the interaction than calling the generic kernel.
2374 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2376 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2378 fr->bcoultab = TRUE;
2381 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2382 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2383 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2384 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2386 if (fr->rcoulomb != fr->rvdw)
2388 fr->bcoultab = TRUE;
2392 if (getenv("GMX_REQUIRE_TABLES"))
2395 fr->bcoultab = TRUE;
2400 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2401 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2404 if (fr->bvdwtab == TRUE)
2406 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2407 fr->nbkernel_vdw_modifier = eintmodNONE;
2409 if (fr->bcoultab == TRUE)
2411 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2412 fr->nbkernel_elec_modifier = eintmodNONE;
2416 if (ir->cutoff_scheme == ecutsVERLET)
2418 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2420 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2422 fr->bvdwtab = FALSE;
2423 fr->bcoultab = FALSE;
2426 /* Tables are used for direct ewald sum */
2429 if (EEL_PME(ir->coulombtype))
2433 fprintf(fp, "Will do PME sum in reciprocal space.\n");
2435 if (ir->coulombtype == eelP3M_AD)
2437 please_cite(fp, "Hockney1988");
2438 please_cite(fp, "Ballenegger2012");
2442 please_cite(fp, "Essmann95a");
2445 if (ir->ewald_geometry == eewg3DC)
2449 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2451 please_cite(fp, "In-Chul99a");
2454 fr->ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
2455 init_ewald_tab(&(fr->ewald_table), ir, fp);
2458 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2463 /* Electrostatics */
2464 fr->epsilon_r = ir->epsilon_r;
2465 fr->epsilon_rf = ir->epsilon_rf;
2466 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2467 fr->rcoulomb_switch = ir->rcoulomb_switch;
2468 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2470 /* Parameters for generalized RF */
2474 if (fr->eeltype == eelGRF)
2476 init_generalized_rf(fp, mtop, ir, fr);
2478 else if (fr->eeltype == eelSHIFT)
2480 for (m = 0; (m < DIM); m++)
2482 box_size[m] = box[m][m];
2485 if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
2487 set_shift_consts(fr->rcoulomb_switch, fr->rcoulomb, box_size);
2491 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
2492 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2493 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2494 IR_ELEC_FIELD(*ir) ||
2495 (fr->adress_icor != eAdressICOff)
2498 if (fr->cutoff_scheme == ecutsGROUP &&
2499 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2501 /* Count the total number of charge groups */
2502 fr->cg_nalloc = ncg_mtop(mtop);
2503 srenew(fr->cg_cm, fr->cg_nalloc);
2505 if (fr->shift_vec == NULL)
2507 snew(fr->shift_vec, SHIFTS);
2510 if (fr->fshift == NULL)
2512 snew(fr->fshift, SHIFTS);
2515 if (fr->nbfp == NULL)
2517 fr->ntype = mtop->ffparams.atnr;
2518 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2521 /* Copy the energy group exclusions */
2522 fr->egp_flags = ir->opts.egp_flags;
2524 /* Van der Waals stuff */
2525 fr->rvdw = cutoff_inf(ir->rvdw);
2526 fr->rvdw_switch = ir->rvdw_switch;
2527 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2529 if (fr->rvdw_switch >= fr->rvdw)
2531 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2532 fr->rvdw_switch, fr->rvdw);
2536 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2537 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2538 fr->rvdw_switch, fr->rvdw);
2542 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2544 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2549 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2550 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2553 fr->eDispCorr = ir->eDispCorr;
2554 if (ir->eDispCorr != edispcNO)
2556 set_avcsixtwelve(fp, fr, mtop);
2561 set_bham_b_max(fp, fr, mtop);
2564 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2566 /* Copy the GBSA data (radius, volume and surftens for each
2567 * atomtype) from the topology atomtype section to forcerec.
2569 snew(fr->atype_radius, fr->ntype);
2570 snew(fr->atype_vol, fr->ntype);
2571 snew(fr->atype_surftens, fr->ntype);
2572 snew(fr->atype_gb_radius, fr->ntype);
2573 snew(fr->atype_S_hct, fr->ntype);
2575 if (mtop->atomtypes.nr > 0)
2577 for (i = 0; i < fr->ntype; i++)
2579 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2581 for (i = 0; i < fr->ntype; i++)
2583 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2585 for (i = 0; i < fr->ntype; i++)
2587 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2589 for (i = 0; i < fr->ntype; i++)
2591 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2593 for (i = 0; i < fr->ntype; i++)
2595 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2599 /* Generate the GB table if needed */
2603 fr->gbtabscale = 2000;
2605 fr->gbtabscale = 500;
2609 fr->gbtab = make_gb_table(fp, oenv, fr, tabpfn, fr->gbtabscale);
2611 init_gb(&fr->born, cr, fr, ir, mtop, ir->rgbradii, ir->gb_algorithm);
2613 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2614 if (!DOMAINDECOMP(cr))
2616 make_local_gb(cr, fr->born, ir->gb_algorithm);
2620 /* Set the charge scaling */
2621 if (fr->epsilon_r != 0)
2623 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2627 /* eps = 0 is infinite dieletric: no coulomb interactions */
2631 /* Reaction field constants */
2632 if (EEL_RF(fr->eeltype))
2634 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2635 fr->rcoulomb, fr->temp, fr->zsquare, box,
2636 &fr->kappa, &fr->k_rf, &fr->c_rf);
2639 set_chargesum(fp, fr, mtop);
2641 /* if we are using LR electrostatics, and they are tabulated,
2642 * the tables will contain modified coulomb interactions.
2643 * Since we want to use the non-shifted ones for 1-4
2644 * coulombic interactions, we must have an extra set of tables.
2647 /* Construct tables.
2648 * A little unnecessary to make both vdw and coul tables sometimes,
2649 * but what the heck... */
2651 bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2653 bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2654 fr->bBHAM || fr->bEwald) &&
2655 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2656 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2657 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2659 negp_pp = ir->opts.ngener - ir->nwall;
2663 bNormalnblists = TRUE;
2668 bNormalnblists = (ir->eDispCorr != edispcNO);
2669 for (egi = 0; egi < negp_pp; egi++)
2671 for (egj = egi; egj < negp_pp; egj++)
2673 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2674 if (!(egp_flags & EGP_EXCL))
2676 if (egp_flags & EGP_TABLE)
2682 bNormalnblists = TRUE;
2689 fr->nnblists = negptable + 1;
2693 fr->nnblists = negptable;
2695 if (fr->nnblists > 1)
2697 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2706 snew(fr->nblists, fr->nnblists);
2708 /* This code automatically gives table length tabext without cut-off's,
2709 * in that case grompp should already have checked that we do not need
2710 * normal tables and we only generate tables for 1-4 interactions.
2712 rtab = ir->rlistlong + ir->tabext;
2716 /* make tables for ordinary interactions */
2719 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2722 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2726 fr->tab14 = fr->nblists[0].table_elec_vdw;
2736 /* Read the special tables for certain energy group pairs */
2737 nm_ind = mtop->groups.grps[egcENER].nm_ind;
2738 for (egi = 0; egi < negp_pp; egi++)
2740 for (egj = egi; egj < negp_pp; egj++)
2742 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2743 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2745 nbl = &(fr->nblists[m]);
2746 if (fr->nnblists > 1)
2748 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2750 /* Read the table file with the two energy groups names appended */
2751 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2752 *mtop->groups.grpname[nm_ind[egi]],
2753 *mtop->groups.grpname[nm_ind[egj]],
2757 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2758 *mtop->groups.grpname[nm_ind[egi]],
2759 *mtop->groups.grpname[nm_ind[egj]],
2760 &fr->nblists[fr->nnblists/2+m]);
2764 else if (fr->nnblists > 1)
2766 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2774 /* generate extra tables with plain Coulomb for 1-4 interactions only */
2775 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2776 GMX_MAKETABLES_14ONLY);
2779 /* Read AdResS Thermo Force table if needed */
2780 if (fr->adress_icor == eAdressICThermoForce)
2782 /* old todo replace */
2784 if (ir->adress->n_tf_grps > 0)
2786 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2791 /* load the default table */
2792 snew(fr->atf_tabs, 1);
2793 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2798 fr->nwall = ir->nwall;
2799 if (ir->nwall && ir->wall_type == ewtTABLE)
2801 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2806 fcd->bondtab = make_bonded_tables(fp,
2807 F_TABBONDS, F_TABBONDSNC,
2809 fcd->angletab = make_bonded_tables(fp,
2812 fcd->dihtab = make_bonded_tables(fp,
2820 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2824 /* QM/MM initialization if requested
2828 fprintf(stderr, "QM/MM calculation requested.\n");
2831 fr->bQMMM = ir->bQMMM;
2832 fr->qr = mk_QMMMrec();
2834 /* Set all the static charge group info */
2835 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2836 &fr->bExcl_IntraCGAll_InterCGNone);
2837 if (DOMAINDECOMP(cr))
2843 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2846 if (!DOMAINDECOMP(cr))
2848 /* When using particle decomposition, the effect of the second argument,
2849 * which sets fr->hcg, is corrected later in do_md and init_em.
2851 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2852 mtop->natoms, mtop->natoms, mtop->natoms);
2855 fr->print_force = print_force;
2858 /* coarse load balancing vars */
2863 /* Initialize neighbor search */
2864 init_ns(fp, cr, &fr->ns, fr, mtop, box);
2866 if (cr->duty & DUTY_PP)
2868 gmx_nonbonded_setup(fp, fr, bGenericKernelOnly);
2872 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2877 /* Initialize the thread working data for bonded interactions */
2878 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2880 snew(fr->excl_load, fr->nthreads+1);
2882 if (fr->cutoff_scheme == ecutsVERLET)
2884 if (ir->rcoulomb != ir->rvdw)
2886 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2889 init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2892 /* fr->ic is used both by verlet and group kernels (to some extent) now */
2893 init_interaction_const(fp, &fr->ic, fr, rtab);
2894 if (ir->eDispCorr != edispcNO)
2896 calc_enervirdiff(fp, ir->eDispCorr, fr);
2900 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2901 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
2902 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
2904 void pr_forcerec(FILE *fp, t_forcerec *fr, t_commrec *cr)
2908 pr_real(fp, fr->rlist);
2909 pr_real(fp, fr->rcoulomb);
2910 pr_real(fp, fr->fudgeQQ);
2911 pr_bool(fp, fr->bGrid);
2912 pr_bool(fp, fr->bTwinRange);
2913 /*pr_int(fp,fr->cg0);
2914 pr_int(fp,fr->hcg);*/
2915 for (i = 0; i < fr->nnblists; i++)
2917 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
2919 pr_real(fp, fr->rcoulomb_switch);
2920 pr_real(fp, fr->rcoulomb);
2925 void forcerec_set_excl_load(t_forcerec *fr,
2926 const gmx_localtop_t *top, const t_commrec *cr)
2929 int t, i, j, ntot, n, ntarget;
2931 if (cr != NULL && PARTDECOMP(cr))
2933 /* No OpenMP with particle decomposition */
2941 ind = top->excls.index;
2945 for (i = 0; i < top->excls.nr; i++)
2947 for (j = ind[i]; j < ind[i+1]; j++)
2956 fr->excl_load[0] = 0;
2959 for (t = 1; t <= fr->nthreads; t++)
2961 ntarget = (ntot*t)/fr->nthreads;
2962 while (i < top->excls.nr && n < ntarget)
2964 for (j = ind[i]; j < ind[i+1]; j++)
2973 fr->excl_load[t] = i;