1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
51 #include "gmx_fatal.h"
57 #include "nonbonded.h"
62 #include "gmx_simd_macros.h"
65 /* Find a better place for this? */
66 const int cmap_coeff_matrix[] = {
67 1, 0, -3, 2, 0, 0, 0, 0, -3, 0, 9, -6, 2, 0, -6, 4,
68 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, -9, 6, -2, 0, 6, -4,
69 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -6, 0, 0, -6, 4,
70 0, 0, 3, -2, 0, 0, 0, 0, 0, 0, -9, 6, 0, 0, 6, -4,
71 0, 0, 0, 0, 1, 0, -3, 2, -2, 0, 6, -4, 1, 0, -3, 2,
72 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 3, -2, 1, 0, -3, 2,
73 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 2, 0, 0, 3, -2,
74 0, 0, 0, 0, 0, 0, 3, -2, 0, 0, -6, 4, 0, 0, 3, -2,
75 0, 1, -2, 1, 0, 0, 0, 0, 0, -3, 6, -3, 0, 2, -4, 2,
76 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -6, 3, 0, -2, 4, -2,
77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 2, -2,
78 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 3, -3, 0, 0, -2, 2,
79 0, 0, 0, 0, 0, 1, -2, 1, 0, -2, 4, -2, 0, 1, -2, 1,
80 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -1, 0, 1, -2, 1,
81 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, -1, 1,
82 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 2, -2, 0, 0, -1, 1
87 int glatnr(int *global_atom_index, int i)
91 if (global_atom_index == NULL)
97 atnr = global_atom_index[i] + 1;
103 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
107 return pbc_dx_aiuc(pbc, xi, xj, dx);
111 rvec_sub(xi, xj, dx);
118 /* Below are 3 SIMD vector operations.
119 * Currently these are only used here, but they should be moved to
120 * a general SIMD include file when used elsewhere.
123 /* SIMD inner-product of multiple vectors */
124 static gmx_inline gmx_mm_pr
125 gmx_iprod_pr(gmx_mm_pr ax, gmx_mm_pr ay, gmx_mm_pr az,
126 gmx_mm_pr bx, gmx_mm_pr by, gmx_mm_pr bz)
130 ret = gmx_mul_pr(ax, bx);
131 ret = gmx_madd_pr(ay, by, ret);
132 ret = gmx_madd_pr(az, bz, ret);
137 /* SIMD norm squared of multiple vectors */
138 static gmx_inline gmx_mm_pr
139 gmx_norm2_pr(gmx_mm_pr ax, gmx_mm_pr ay, gmx_mm_pr az)
143 ret = gmx_mul_pr(ax, ax);
144 ret = gmx_madd_pr(ay, ay, ret);
145 ret = gmx_madd_pr(az, az, ret);
150 /* SIMD cross-product of multiple vectors */
151 static gmx_inline void
152 gmx_cprod_pr(gmx_mm_pr ax, gmx_mm_pr ay, gmx_mm_pr az,
153 gmx_mm_pr bx, gmx_mm_pr by, gmx_mm_pr bz,
154 gmx_mm_pr *cx, gmx_mm_pr *cy, gmx_mm_pr *cz)
156 *cx = gmx_mul_pr(ay, bz);
157 *cx = gmx_nmsub_pr(az, by, *cx);
159 *cy = gmx_mul_pr(az, bx);
160 *cy = gmx_nmsub_pr(ax, bz, *cy);
162 *cz = gmx_mul_pr(ax, by);
163 *cz = gmx_nmsub_pr(ay, bx, *cz);
166 /* SIMD PBC data structure, containing 1/boxdiag and the box vectors */
179 /* Set the SIMD pbc data from a normal t_pbc struct */
180 static void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd)
185 /* Setting inv_bdiag to 0 effectively turns off PBC */
186 clear_rvec(inv_bdiag);
189 for (d = 0; d < pbc->ndim_ePBC; d++)
191 inv_bdiag[d] = 1.0/pbc->box[d][d];
195 pbc_simd->inv_bzz = gmx_set1_pr(inv_bdiag[ZZ]);
196 pbc_simd->inv_byy = gmx_set1_pr(inv_bdiag[YY]);
197 pbc_simd->inv_bxx = gmx_set1_pr(inv_bdiag[XX]);
201 pbc_simd->bzx = gmx_set1_pr(pbc->box[ZZ][XX]);
202 pbc_simd->bzy = gmx_set1_pr(pbc->box[ZZ][YY]);
203 pbc_simd->bzz = gmx_set1_pr(pbc->box[ZZ][ZZ]);
204 pbc_simd->byx = gmx_set1_pr(pbc->box[YY][XX]);
205 pbc_simd->byy = gmx_set1_pr(pbc->box[YY][YY]);
206 pbc_simd->bxx = gmx_set1_pr(pbc->box[XX][XX]);
210 pbc_simd->bzx = gmx_setzero_pr();
211 pbc_simd->bzy = gmx_setzero_pr();
212 pbc_simd->bzz = gmx_setzero_pr();
213 pbc_simd->byx = gmx_setzero_pr();
214 pbc_simd->byy = gmx_setzero_pr();
215 pbc_simd->bxx = gmx_setzero_pr();
219 /* Correct distance vector *dx,*dy,*dz for PBC using SIMD */
220 static gmx_inline void
221 pbc_dx_simd(gmx_mm_pr *dx, gmx_mm_pr *dy, gmx_mm_pr *dz,
222 const pbc_simd_t *pbc)
226 sh = gmx_round_pr(gmx_mul_pr(*dz, pbc->inv_bzz));
227 *dx = gmx_nmsub_pr(sh, pbc->bzx, *dx);
228 *dy = gmx_nmsub_pr(sh, pbc->bzy, *dy);
229 *dz = gmx_nmsub_pr(sh, pbc->bzz, *dz);
231 sh = gmx_round_pr(gmx_mul_pr(*dy, pbc->inv_byy));
232 *dx = gmx_nmsub_pr(sh, pbc->byx, *dx);
233 *dy = gmx_nmsub_pr(sh, pbc->byy, *dy);
235 sh = gmx_round_pr(gmx_mul_pr(*dx, pbc->inv_bxx));
236 *dx = gmx_nmsub_pr(sh, pbc->bxx, *dx);
239 #endif /* SIMD_BONDEDS */
242 * Morse potential bond by Frank Everdij
244 * Three parameters needed:
246 * b0 = equilibrium distance in nm
247 * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
248 * cb = well depth in kJ/mol
250 * Note: the potential is referenced to be +cb at infinite separation
251 * and zero at the equilibrium distance!
254 real morse_bonds(int nbonds,
255 const t_iatom forceatoms[], const t_iparams forceparams[],
256 const rvec x[], rvec f[], rvec fshift[],
257 const t_pbc *pbc, const t_graph *g,
258 real lambda, real *dvdlambda,
259 const t_mdatoms *md, t_fcdata *fcd,
260 int *global_atom_index)
262 const real one = 1.0;
263 const real two = 2.0;
264 real dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, fij, vtot;
265 real b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
267 int i, m, ki, type, ai, aj;
271 for (i = 0; (i < nbonds); )
273 type = forceatoms[i++];
274 ai = forceatoms[i++];
275 aj = forceatoms[i++];
277 b0A = forceparams[type].morse.b0A;
278 beA = forceparams[type].morse.betaA;
279 cbA = forceparams[type].morse.cbA;
281 b0B = forceparams[type].morse.b0B;
282 beB = forceparams[type].morse.betaB;
283 cbB = forceparams[type].morse.cbB;
285 L1 = one-lambda; /* 1 */
286 b0 = L1*b0A + lambda*b0B; /* 3 */
287 be = L1*beA + lambda*beB; /* 3 */
288 cb = L1*cbA + lambda*cbB; /* 3 */
290 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
291 dr2 = iprod(dx, dx); /* 5 */
292 dr = dr2*gmx_invsqrt(dr2); /* 10 */
293 temp = exp(-be*(dr-b0)); /* 12 */
297 /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
298 *dvdlambda += cbB-cbA;
302 omtemp = one-temp; /* 1 */
303 cbomtemp = cb*omtemp; /* 1 */
304 vbond = cbomtemp*omtemp; /* 1 */
305 fbond = -two*be*temp*cbomtemp*gmx_invsqrt(dr2); /* 9 */
306 vtot += vbond; /* 1 */
308 *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
312 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
316 for (m = 0; (m < DIM); m++) /* 15 */
321 fshift[ki][m] += fij;
322 fshift[CENTRAL][m] -= fij;
328 real cubic_bonds(int nbonds,
329 const t_iatom forceatoms[], const t_iparams forceparams[],
330 const rvec x[], rvec f[], rvec fshift[],
331 const t_pbc *pbc, const t_graph *g,
332 real lambda, real *dvdlambda,
333 const t_mdatoms *md, t_fcdata *fcd,
334 int *global_atom_index)
336 const real three = 3.0;
337 const real two = 2.0;
339 real dr, dr2, dist, kdist, kdist2, fbond, vbond, fij, vtot;
341 int i, m, ki, type, ai, aj;
345 for (i = 0; (i < nbonds); )
347 type = forceatoms[i++];
348 ai = forceatoms[i++];
349 aj = forceatoms[i++];
351 b0 = forceparams[type].cubic.b0;
352 kb = forceparams[type].cubic.kb;
353 kcub = forceparams[type].cubic.kcub;
355 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
356 dr2 = iprod(dx, dx); /* 5 */
363 dr = dr2*gmx_invsqrt(dr2); /* 10 */
368 vbond = kdist2 + kcub*kdist2*dist;
369 fbond = -(two*kdist + three*kdist2*kcub)/dr;
371 vtot += vbond; /* 21 */
375 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
378 for (m = 0; (m < DIM); m++) /* 15 */
383 fshift[ki][m] += fij;
384 fshift[CENTRAL][m] -= fij;
390 real FENE_bonds(int nbonds,
391 const t_iatom forceatoms[], const t_iparams forceparams[],
392 const rvec x[], rvec f[], rvec fshift[],
393 const t_pbc *pbc, const t_graph *g,
394 real lambda, real *dvdlambda,
395 const t_mdatoms *md, t_fcdata *fcd,
396 int *global_atom_index)
398 const real half = 0.5;
399 const real one = 1.0;
401 real dr, dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
403 int i, m, ki, type, ai, aj;
407 for (i = 0; (i < nbonds); )
409 type = forceatoms[i++];
410 ai = forceatoms[i++];
411 aj = forceatoms[i++];
413 bm = forceparams[type].fene.bm;
414 kb = forceparams[type].fene.kb;
416 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
417 dr2 = iprod(dx, dx); /* 5 */
429 "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
431 glatnr(global_atom_index, ai),
432 glatnr(global_atom_index, aj));
435 omdr2obm2 = one - dr2/bm2;
437 vbond = -half*kb*bm2*log(omdr2obm2);
438 fbond = -kb/omdr2obm2;
440 vtot += vbond; /* 35 */
444 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
447 for (m = 0; (m < DIM); m++) /* 15 */
452 fshift[ki][m] += fij;
453 fshift[CENTRAL][m] -= fij;
459 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
462 const real half = 0.5;
463 real L1, kk, x0, dx, dx2;
464 real v, f, dvdlambda;
467 kk = L1*kA+lambda*kB;
468 x0 = L1*xA+lambda*xB;
475 dvdlambda = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
482 /* That was 19 flops */
486 real bonds(int nbonds,
487 const t_iatom forceatoms[], const t_iparams forceparams[],
488 const rvec x[], rvec f[], rvec fshift[],
489 const t_pbc *pbc, const t_graph *g,
490 real lambda, real *dvdlambda,
491 const t_mdatoms *md, t_fcdata *fcd,
492 int *global_atom_index)
494 int i, m, ki, ai, aj, type;
495 real dr, dr2, fbond, vbond, fij, vtot;
500 for (i = 0; (i < nbonds); )
502 type = forceatoms[i++];
503 ai = forceatoms[i++];
504 aj = forceatoms[i++];
506 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
507 dr2 = iprod(dx, dx); /* 5 */
508 dr = dr2*gmx_invsqrt(dr2); /* 10 */
510 *dvdlambda += harmonic(forceparams[type].harmonic.krA,
511 forceparams[type].harmonic.krB,
512 forceparams[type].harmonic.rA,
513 forceparams[type].harmonic.rB,
514 dr, lambda, &vbond, &fbond); /* 19 */
522 vtot += vbond; /* 1*/
523 fbond *= gmx_invsqrt(dr2); /* 6 */
527 fprintf(debug, "BONDS: dr = %10g vbond = %10g fbond = %10g\n",
533 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
536 for (m = 0; (m < DIM); m++) /* 15 */
541 fshift[ki][m] += fij;
542 fshift[CENTRAL][m] -= fij;
548 real restraint_bonds(int nbonds,
549 const t_iatom forceatoms[], const t_iparams forceparams[],
550 const rvec x[], rvec f[], rvec fshift[],
551 const t_pbc *pbc, const t_graph *g,
552 real lambda, real *dvdlambda,
553 const t_mdatoms *md, t_fcdata *fcd,
554 int *global_atom_index)
556 int i, m, ki, ai, aj, type;
557 real dr, dr2, fbond, vbond, fij, vtot;
559 real low, dlow, up1, dup1, up2, dup2, k, dk;
567 for (i = 0; (i < nbonds); )
569 type = forceatoms[i++];
570 ai = forceatoms[i++];
571 aj = forceatoms[i++];
573 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
574 dr2 = iprod(dx, dx); /* 5 */
575 dr = dr2*gmx_invsqrt(dr2); /* 10 */
577 low = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
578 dlow = -forceparams[type].restraint.lowA + forceparams[type].restraint.lowB;
579 up1 = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
580 dup1 = -forceparams[type].restraint.up1A + forceparams[type].restraint.up1B;
581 up2 = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
582 dup2 = -forceparams[type].restraint.up2A + forceparams[type].restraint.up2B;
583 k = L1*forceparams[type].restraint.kA + lambda*forceparams[type].restraint.kB;
584 dk = -forceparams[type].restraint.kA + forceparams[type].restraint.kB;
593 *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
606 *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
611 vbond = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
612 fbond = -k*(up2 - up1);
613 *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
614 + k*(dup2 - dup1)*(up2 - up1 + drh)
615 - k*(up2 - up1)*dup2;
623 vtot += vbond; /* 1*/
624 fbond *= gmx_invsqrt(dr2); /* 6 */
628 fprintf(debug, "BONDS: dr = %10g vbond = %10g fbond = %10g\n",
634 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
637 for (m = 0; (m < DIM); m++) /* 15 */
642 fshift[ki][m] += fij;
643 fshift[CENTRAL][m] -= fij;
650 real polarize(int nbonds,
651 const t_iatom forceatoms[], const t_iparams forceparams[],
652 const rvec x[], rvec f[], rvec fshift[],
653 const t_pbc *pbc, const t_graph *g,
654 real lambda, real *dvdlambda,
655 const t_mdatoms *md, t_fcdata *fcd,
656 int *global_atom_index)
658 int i, m, ki, ai, aj, type;
659 real dr, dr2, fbond, vbond, fij, vtot, ksh;
664 for (i = 0; (i < nbonds); )
666 type = forceatoms[i++];
667 ai = forceatoms[i++];
668 aj = forceatoms[i++];
669 ksh = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
672 fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
675 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
676 dr2 = iprod(dx, dx); /* 5 */
677 dr = dr2*gmx_invsqrt(dr2); /* 10 */
679 *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /* 19 */
686 vtot += vbond; /* 1*/
687 fbond *= gmx_invsqrt(dr2); /* 6 */
691 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
694 for (m = 0; (m < DIM); m++) /* 15 */
699 fshift[ki][m] += fij;
700 fshift[CENTRAL][m] -= fij;
706 real anharm_polarize(int nbonds,
707 const t_iatom forceatoms[], const t_iparams forceparams[],
708 const rvec x[], rvec f[], rvec fshift[],
709 const t_pbc *pbc, const t_graph *g,
710 real lambda, real *dvdlambda,
711 const t_mdatoms *md, t_fcdata *fcd,
712 int *global_atom_index)
714 int i, m, ki, ai, aj, type;
715 real dr, dr2, fbond, vbond, fij, vtot, ksh, khyp, drcut, ddr, ddr3;
720 for (i = 0; (i < nbonds); )
722 type = forceatoms[i++];
723 ai = forceatoms[i++];
724 aj = forceatoms[i++];
725 ksh = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
726 khyp = forceparams[type].anharm_polarize.khyp;
727 drcut = forceparams[type].anharm_polarize.drcut;
730 fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
733 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
734 dr2 = iprod(dx, dx); /* 5 */
735 dr = dr2*gmx_invsqrt(dr2); /* 10 */
737 *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /* 19 */
748 vbond += khyp*ddr*ddr3;
749 fbond -= 4*khyp*ddr3;
751 fbond *= gmx_invsqrt(dr2); /* 6 */
752 vtot += vbond; /* 1*/
756 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
759 for (m = 0; (m < DIM); m++) /* 15 */
764 fshift[ki][m] += fij;
765 fshift[CENTRAL][m] -= fij;
771 real water_pol(int nbonds,
772 const t_iatom forceatoms[], const t_iparams forceparams[],
773 const rvec x[], rvec f[], rvec fshift[],
774 const t_pbc *pbc, const t_graph *g,
775 real lambda, real *dvdlambda,
776 const t_mdatoms *md, t_fcdata *fcd,
777 int *global_atom_index)
779 /* This routine implements anisotropic polarizibility for water, through
780 * a shell connected to a dummy with spring constant that differ in the
781 * three spatial dimensions in the molecular frame.
783 int i, m, aO, aH1, aH2, aD, aS, type, type0;
784 rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
788 real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
793 type0 = forceatoms[0];
795 qS = md->chargeA[aS];
796 kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
797 kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
798 kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
799 r_HH = 1.0/forceparams[type0].wpol.rHH;
800 r_OD = 1.0/forceparams[type0].wpol.rOD;
803 fprintf(debug, "WPOL: qS = %10.5f aS = %5d\n", qS, aS);
804 fprintf(debug, "WPOL: kk = %10.3f %10.3f %10.3f\n",
805 kk[XX], kk[YY], kk[ZZ]);
806 fprintf(debug, "WPOL: rOH = %10.3f rHH = %10.3f rOD = %10.3f\n",
807 forceparams[type0].wpol.rOH,
808 forceparams[type0].wpol.rHH,
809 forceparams[type0].wpol.rOD);
811 for (i = 0; (i < nbonds); i += 6)
813 type = forceatoms[i];
816 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d",
817 type, type0, __FILE__, __LINE__);
819 aO = forceatoms[i+1];
820 aH1 = forceatoms[i+2];
821 aH2 = forceatoms[i+3];
822 aD = forceatoms[i+4];
823 aS = forceatoms[i+5];
825 /* Compute vectors describing the water frame */
826 rvec_sub(x[aH1], x[aO], dOH1);
827 rvec_sub(x[aH2], x[aO], dOH2);
828 rvec_sub(x[aH2], x[aH1], dHH);
829 rvec_sub(x[aD], x[aO], dOD);
830 rvec_sub(x[aS], x[aD], dDS);
831 cprod(dOH1, dOH2, nW);
833 /* Compute inverse length of normal vector
834 * (this one could be precomputed, but I'm too lazy now)
836 r_nW = gmx_invsqrt(iprod(nW, nW));
837 /* This is for precision, but does not make a big difference,
840 r_OD = gmx_invsqrt(iprod(dOD, dOD));
842 /* Normalize the vectors in the water frame */
844 svmul(r_HH, dHH, dHH);
845 svmul(r_OD, dOD, dOD);
847 /* Compute displacement of shell along components of the vector */
848 dx[ZZ] = iprod(dDS, dOD);
849 /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
850 for (m = 0; (m < DIM); m++)
852 proj[m] = dDS[m]-dx[ZZ]*dOD[m];
855 /*dx[XX] = iprod(dDS,nW);
856 dx[YY] = iprod(dDS,dHH);*/
857 dx[XX] = iprod(proj, nW);
858 for (m = 0; (m < DIM); m++)
860 proj[m] -= dx[XX]*nW[m];
862 dx[YY] = iprod(proj, dHH);
867 fprintf(debug, "WPOL: dx2=%10g dy2=%10g dz2=%10g sum=%10g dDS^2=%10g\n",
868 sqr(dx[XX]), sqr(dx[YY]), sqr(dx[ZZ]), iprod(dx, dx), iprod(dDS, dDS));
869 fprintf(debug, "WPOL: dHH=(%10g,%10g,%10g)\n", dHH[XX], dHH[YY], dHH[ZZ]);
870 fprintf(debug, "WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
871 dOD[XX], dOD[YY], dOD[ZZ], 1/r_OD);
872 fprintf(debug, "WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
873 nW[XX], nW[YY], nW[ZZ], 1/r_nW);
874 fprintf(debug, "WPOL: dx =%10g, dy =%10g, dz =%10g\n",
875 dx[XX], dx[YY], dx[ZZ]);
876 fprintf(debug, "WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
877 dDS[XX], dDS[YY], dDS[ZZ]);
880 /* Now compute the forces and energy */
881 kdx[XX] = kk[XX]*dx[XX];
882 kdx[YY] = kk[YY]*dx[YY];
883 kdx[ZZ] = kk[ZZ]*dx[ZZ];
884 vtot += iprod(dx, kdx);
885 for (m = 0; (m < DIM); m++)
887 /* This is a tensor operation but written out for speed */
901 fprintf(debug, "WPOL: vwpol=%g\n", 0.5*iprod(dx, kdx));
902 fprintf(debug, "WPOL: df = (%10g, %10g, %10g)\n", df[XX], df[YY], df[ZZ]);
910 static real do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj,
911 const t_pbc *pbc, real qq,
912 rvec fshift[], real afac)
915 real r12sq, r12_1, r12n, r12bar, v0, v1, fscal, ebar, fff;
918 t = pbc_rvec_sub(pbc, xi, xj, r12); /* 3 */
920 r12sq = iprod(r12, r12); /* 5 */
921 r12_1 = gmx_invsqrt(r12sq); /* 5 */
922 r12bar = afac/r12_1; /* 5 */
923 v0 = qq*ONE_4PI_EPS0*r12_1; /* 2 */
924 ebar = exp(-r12bar); /* 5 */
925 v1 = (1-(1+0.5*r12bar)*ebar); /* 4 */
926 fscal = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
929 fprintf(debug, "THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f ebar = %.3f\n", v0, v1, 1/r12_1, r12bar, fscal, ebar);
932 for (m = 0; (m < DIM); m++)
938 fshift[CENTRAL][m] -= fff;
941 return v0*v1; /* 1 */
945 real thole_pol(int nbonds,
946 const t_iatom forceatoms[], const t_iparams forceparams[],
947 const rvec x[], rvec f[], rvec fshift[],
948 const t_pbc *pbc, const t_graph *g,
949 real lambda, real *dvdlambda,
950 const t_mdatoms *md, t_fcdata *fcd,
951 int *global_atom_index)
953 /* Interaction between two pairs of particles with opposite charge */
954 int i, type, a1, da1, a2, da2;
955 real q1, q2, qq, a, al1, al2, afac;
958 for (i = 0; (i < nbonds); )
960 type = forceatoms[i++];
961 a1 = forceatoms[i++];
962 da1 = forceatoms[i++];
963 a2 = forceatoms[i++];
964 da2 = forceatoms[i++];
965 q1 = md->chargeA[da1];
966 q2 = md->chargeA[da2];
967 a = forceparams[type].thole.a;
968 al1 = forceparams[type].thole.alpha1;
969 al2 = forceparams[type].thole.alpha2;
971 afac = a*pow(al1*al2, -1.0/6.0);
972 V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
973 V += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
974 V += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
975 V += do_1_thole(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
981 real bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
982 rvec r_ij, rvec r_kj, real *costh,
984 /* Return value is the angle between the bonds i-j and j-k */
989 *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /* 3 */
990 *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /* 3 */
992 *costh = cos_angle(r_ij, r_kj); /* 25 */
993 th = acos(*costh); /* 10 */
998 real angles(int nbonds,
999 const t_iatom forceatoms[], const t_iparams forceparams[],
1000 const rvec x[], rvec f[], rvec fshift[],
1001 const t_pbc *pbc, const t_graph *g,
1002 real lambda, real *dvdlambda,
1003 const t_mdatoms *md, t_fcdata *fcd,
1004 int *global_atom_index)
1006 int i, ai, aj, ak, t1, t2, type;
1008 real cos_theta, cos_theta2, theta, dVdt, va, vtot;
1009 ivec jt, dt_ij, dt_kj;
1012 for (i = 0; i < nbonds; )
1014 type = forceatoms[i++];
1015 ai = forceatoms[i++];
1016 aj = forceatoms[i++];
1017 ak = forceatoms[i++];
1019 theta = bond_angle(x[ai], x[aj], x[ak], pbc,
1020 r_ij, r_kj, &cos_theta, &t1, &t2); /* 41 */
1022 *dvdlambda += harmonic(forceparams[type].harmonic.krA,
1023 forceparams[type].harmonic.krB,
1024 forceparams[type].harmonic.rA*DEG2RAD,
1025 forceparams[type].harmonic.rB*DEG2RAD,
1026 theta, lambda, &va, &dVdt); /* 21 */
1029 cos_theta2 = sqr(cos_theta);
1036 real nrkj_1, nrij_1;
1039 st = dVdt*gmx_invsqrt(1 - cos_theta2); /* 12 */
1040 sth = st*cos_theta; /* 1 */
1044 fprintf(debug, "ANGLES: theta = %10g vth = %10g dV/dtheta = %10g\n",
1045 theta*RAD2DEG, va, dVdt);
1048 nrij2 = iprod(r_ij, r_ij); /* 5 */
1049 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1051 nrij_1 = gmx_invsqrt(nrij2); /* 10 */
1052 nrkj_1 = gmx_invsqrt(nrkj2); /* 10 */
1054 cik = st*nrij_1*nrkj_1; /* 2 */
1055 cii = sth*nrij_1*nrij_1; /* 2 */
1056 ckk = sth*nrkj_1*nrkj_1; /* 2 */
1058 for (m = 0; m < DIM; m++)
1060 f_i[m] = -(cik*r_kj[m] - cii*r_ij[m]);
1061 f_k[m] = -(cik*r_ij[m] - ckk*r_kj[m]);
1062 f_j[m] = -f_i[m] - f_k[m];
1069 copy_ivec(SHIFT_IVEC(g, aj), jt);
1071 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1072 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1073 t1 = IVEC2IS(dt_ij);
1074 t2 = IVEC2IS(dt_kj);
1076 rvec_inc(fshift[t1], f_i);
1077 rvec_inc(fshift[CENTRAL], f_j);
1078 rvec_inc(fshift[t2], f_k);
1087 /* As angles, but using SIMD to calculate many dihedrals at once.
1088 * This routines does not calculate energies and shift forces.
1090 static gmx_inline void
1091 angles_noener_simd(int nbonds,
1092 const t_iatom forceatoms[], const t_iparams forceparams[],
1093 const rvec x[], rvec f[],
1094 const t_pbc *pbc, const t_graph *g,
1096 const t_mdatoms *md, t_fcdata *fcd,
1097 int *global_atom_index)
1099 #define UNROLL GMX_SIMD_WIDTH_HERE
1102 int type, ai[UNROLL], aj[UNROLL], ak[UNROLL];
1103 real coeff_array[2*UNROLL+UNROLL], *coeff;
1104 real dr_array[2*DIM*UNROLL+UNROLL], *dr;
1105 real f_buf_array[6*UNROLL+UNROLL], *f_buf;
1106 gmx_mm_pr k_S, theta0_S;
1107 gmx_mm_pr rijx_S, rijy_S, rijz_S;
1108 gmx_mm_pr rkjx_S, rkjy_S, rkjz_S;
1110 gmx_mm_pr rij_rkj_S;
1111 gmx_mm_pr nrij2_S, nrij_1_S;
1112 gmx_mm_pr nrkj2_S, nrkj_1_S;
1113 gmx_mm_pr cos_S, sin_S;
1115 gmx_mm_pr st_S, sth_S;
1116 gmx_mm_pr cik_S, cii_S, ckk_S;
1117 gmx_mm_pr f_ix_S, f_iy_S, f_iz_S;
1118 gmx_mm_pr f_kx_S, f_ky_S, f_kz_S;
1119 pbc_simd_t pbc_simd;
1121 /* Ensure register memory alignment */
1122 coeff = gmx_simd_align_real(coeff_array);
1123 dr = gmx_simd_align_real(dr_array);
1124 f_buf = gmx_simd_align_real(f_buf_array);
1126 set_pbc_simd(pbc,&pbc_simd);
1128 one_S = gmx_set1_pr(1.0);
1130 /* nbonds is the number of angles times nfa1, here we step UNROLL angles */
1131 for (i = 0; (i < nbonds); i += UNROLL*nfa1)
1133 /* Collect atoms for UNROLL angles.
1134 * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1137 for (s = 0; s < UNROLL; s++)
1139 type = forceatoms[iu];
1140 ai[s] = forceatoms[iu+1];
1141 aj[s] = forceatoms[iu+2];
1142 ak[s] = forceatoms[iu+3];
1144 coeff[s] = forceparams[type].harmonic.krA;
1145 coeff[UNROLL+s] = forceparams[type].harmonic.rA*DEG2RAD;
1147 /* If you can't use pbc_dx_simd below for PBC, e.g. because
1148 * you can't round in SIMD, use pbc_rvec_sub here.
1150 /* Store the non PBC corrected distances packed and aligned */
1151 for (m = 0; m < DIM; m++)
1153 dr[s + m *UNROLL] = x[ai[s]][m] - x[aj[s]][m];
1154 dr[s + (DIM+m)*UNROLL] = x[ak[s]][m] - x[aj[s]][m];
1157 /* At the end fill the arrays with identical entries */
1158 if (iu + nfa1 < nbonds)
1164 k_S = gmx_load_pr(coeff);
1165 theta0_S = gmx_load_pr(coeff+UNROLL);
1167 rijx_S = gmx_load_pr(dr + 0*UNROLL);
1168 rijy_S = gmx_load_pr(dr + 1*UNROLL);
1169 rijz_S = gmx_load_pr(dr + 2*UNROLL);
1170 rkjx_S = gmx_load_pr(dr + 3*UNROLL);
1171 rkjy_S = gmx_load_pr(dr + 4*UNROLL);
1172 rkjz_S = gmx_load_pr(dr + 5*UNROLL);
1174 pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, &pbc_simd);
1175 pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, &pbc_simd);
1177 rij_rkj_S = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1178 rkjx_S, rkjy_S, rkjz_S);
1180 nrij2_S = gmx_norm2_pr(rijx_S, rijy_S, rijz_S);
1181 nrkj2_S = gmx_norm2_pr(rkjx_S, rkjy_S, rkjz_S);
1183 nrij_1_S = gmx_invsqrt_pr(nrij2_S);
1184 nrkj_1_S = gmx_invsqrt_pr(nrkj2_S);
1186 cos_S = gmx_mul_pr(rij_rkj_S, gmx_mul_pr(nrij_1_S, nrkj_1_S));
1188 theta_S = gmx_acos_pr(cos_S);
1190 sin_S = gmx_invsqrt_pr(gmx_max_pr(gmx_sub_pr(one_S, gmx_mul_pr(cos_S, cos_S)),
1192 st_S = gmx_mul_pr(gmx_mul_pr(k_S, gmx_sub_pr(theta0_S, theta_S)),
1194 sth_S = gmx_mul_pr(st_S, cos_S);
1196 cik_S = gmx_mul_pr(st_S, gmx_mul_pr(nrij_1_S, nrkj_1_S));
1197 cii_S = gmx_mul_pr(sth_S, gmx_mul_pr(nrij_1_S, nrij_1_S));
1198 ckk_S = gmx_mul_pr(sth_S, gmx_mul_pr(nrkj_1_S, nrkj_1_S));
1200 f_ix_S = gmx_mul_pr(cii_S, rijx_S);
1201 f_ix_S = gmx_nmsub_pr(cik_S, rkjx_S, f_ix_S);
1202 f_iy_S = gmx_mul_pr(cii_S, rijy_S);
1203 f_iy_S = gmx_nmsub_pr(cik_S, rkjy_S, f_iy_S);
1204 f_iz_S = gmx_mul_pr(cii_S, rijz_S);
1205 f_iz_S = gmx_nmsub_pr(cik_S, rkjz_S, f_iz_S);
1206 f_kx_S = gmx_mul_pr(ckk_S, rkjx_S);
1207 f_kx_S = gmx_nmsub_pr(cik_S, rijx_S, f_kx_S);
1208 f_ky_S = gmx_mul_pr(ckk_S, rkjy_S);
1209 f_ky_S = gmx_nmsub_pr(cik_S, rijy_S, f_ky_S);
1210 f_kz_S = gmx_mul_pr(ckk_S, rkjz_S);
1211 f_kz_S = gmx_nmsub_pr(cik_S, rijz_S, f_kz_S);
1213 gmx_store_pr(f_buf + 0*UNROLL, f_ix_S);
1214 gmx_store_pr(f_buf + 1*UNROLL, f_iy_S);
1215 gmx_store_pr(f_buf + 2*UNROLL, f_iz_S);
1216 gmx_store_pr(f_buf + 3*UNROLL, f_kx_S);
1217 gmx_store_pr(f_buf + 4*UNROLL, f_ky_S);
1218 gmx_store_pr(f_buf + 5*UNROLL, f_kz_S);
1224 for (m = 0; m < DIM; m++)
1226 f[ai[s]][m] += f_buf[s + m*UNROLL];
1227 f[aj[s]][m] -= f_buf[s + m*UNROLL] + f_buf[s + (DIM+m)*UNROLL];
1228 f[ak[s]][m] += f_buf[s + (DIM+m)*UNROLL];
1233 while (s < UNROLL && iu < nbonds);
1238 #endif /* SIMD_BONDEDS */
1240 real linear_angles(int nbonds,
1241 const t_iatom forceatoms[], const t_iparams forceparams[],
1242 const rvec x[], rvec f[], rvec fshift[],
1243 const t_pbc *pbc, const t_graph *g,
1244 real lambda, real *dvdlambda,
1245 const t_mdatoms *md, t_fcdata *fcd,
1246 int *global_atom_index)
1248 int i, m, ai, aj, ak, t1, t2, type;
1250 real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1251 ivec jt, dt_ij, dt_kj;
1252 rvec r_ij, r_kj, r_ik, dx;
1256 for (i = 0; (i < nbonds); )
1258 type = forceatoms[i++];
1259 ai = forceatoms[i++];
1260 aj = forceatoms[i++];
1261 ak = forceatoms[i++];
1263 kA = forceparams[type].linangle.klinA;
1264 kB = forceparams[type].linangle.klinB;
1265 klin = L1*kA + lambda*kB;
1267 aA = forceparams[type].linangle.aA;
1268 aB = forceparams[type].linangle.aB;
1269 a = L1*aA+lambda*aB;
1272 t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1273 t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1274 rvec_sub(r_ij, r_kj, r_ik);
1277 for (m = 0; (m < DIM); m++)
1279 dr = -a * r_ij[m] - b * r_kj[m];
1284 f_j[m] = -(f_i[m]+f_k[m]);
1290 *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx, r_ik);
1296 copy_ivec(SHIFT_IVEC(g, aj), jt);
1298 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1299 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1300 t1 = IVEC2IS(dt_ij);
1301 t2 = IVEC2IS(dt_kj);
1303 rvec_inc(fshift[t1], f_i);
1304 rvec_inc(fshift[CENTRAL], f_j);
1305 rvec_inc(fshift[t2], f_k);
1310 real urey_bradley(int nbonds,
1311 const t_iatom forceatoms[], const t_iparams forceparams[],
1312 const rvec x[], rvec f[], rvec fshift[],
1313 const t_pbc *pbc, const t_graph *g,
1314 real lambda, real *dvdlambda,
1315 const t_mdatoms *md, t_fcdata *fcd,
1316 int *global_atom_index)
1318 int i, m, ai, aj, ak, t1, t2, type, ki;
1319 rvec r_ij, r_kj, r_ik;
1320 real cos_theta, cos_theta2, theta;
1321 real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1322 real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1323 ivec jt, dt_ij, dt_kj, dt_ik;
1326 for (i = 0; (i < nbonds); )
1328 type = forceatoms[i++];
1329 ai = forceatoms[i++];
1330 aj = forceatoms[i++];
1331 ak = forceatoms[i++];
1332 th0A = forceparams[type].u_b.thetaA*DEG2RAD;
1333 kthA = forceparams[type].u_b.kthetaA;
1334 r13A = forceparams[type].u_b.r13A;
1335 kUBA = forceparams[type].u_b.kUBA;
1336 th0B = forceparams[type].u_b.thetaB*DEG2RAD;
1337 kthB = forceparams[type].u_b.kthetaB;
1338 r13B = forceparams[type].u_b.r13B;
1339 kUBB = forceparams[type].u_b.kUBB;
1341 theta = bond_angle(x[ai], x[aj], x[ak], pbc,
1342 r_ij, r_kj, &cos_theta, &t1, &t2); /* 41 */
1344 *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /* 21 */
1347 ki = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /* 3 */
1348 dr2 = iprod(r_ik, r_ik); /* 5 */
1349 dr = dr2*gmx_invsqrt(dr2); /* 10 */
1351 *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /* 19 */
1353 cos_theta2 = sqr(cos_theta); /* 1 */
1361 st = dVdt*gmx_invsqrt(1 - cos_theta2); /* 12 */
1362 sth = st*cos_theta; /* 1 */
1366 fprintf(debug, "ANGLES: theta = %10g vth = %10g dV/dtheta = %10g\n",
1367 theta*RAD2DEG, va, dVdt);
1370 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1371 nrij2 = iprod(r_ij, r_ij);
1373 cik = st*gmx_invsqrt(nrkj2*nrij2); /* 12 */
1374 cii = sth/nrij2; /* 10 */
1375 ckk = sth/nrkj2; /* 10 */
1377 for (m = 0; (m < DIM); m++) /* 39 */
1379 f_i[m] = -(cik*r_kj[m]-cii*r_ij[m]);
1380 f_k[m] = -(cik*r_ij[m]-ckk*r_kj[m]);
1381 f_j[m] = -f_i[m]-f_k[m];
1388 copy_ivec(SHIFT_IVEC(g, aj), jt);
1390 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1391 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1392 t1 = IVEC2IS(dt_ij);
1393 t2 = IVEC2IS(dt_kj);
1395 rvec_inc(fshift[t1], f_i);
1396 rvec_inc(fshift[CENTRAL], f_j);
1397 rvec_inc(fshift[t2], f_k);
1399 /* Time for the bond calculations */
1405 vtot += vbond; /* 1*/
1406 fbond *= gmx_invsqrt(dr2); /* 6 */
1410 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1411 ki = IVEC2IS(dt_ik);
1413 for (m = 0; (m < DIM); m++) /* 15 */
1415 fik = fbond*r_ik[m];
1418 fshift[ki][m] += fik;
1419 fshift[CENTRAL][m] -= fik;
1425 real quartic_angles(int nbonds,
1426 const t_iatom forceatoms[], const t_iparams forceparams[],
1427 const rvec x[], rvec f[], rvec fshift[],
1428 const t_pbc *pbc, const t_graph *g,
1429 real lambda, real *dvdlambda,
1430 const t_mdatoms *md, t_fcdata *fcd,
1431 int *global_atom_index)
1433 int i, j, ai, aj, ak, t1, t2, type;
1435 real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1436 ivec jt, dt_ij, dt_kj;
1439 for (i = 0; (i < nbonds); )
1441 type = forceatoms[i++];
1442 ai = forceatoms[i++];
1443 aj = forceatoms[i++];
1444 ak = forceatoms[i++];
1446 theta = bond_angle(x[ai], x[aj], x[ak], pbc,
1447 r_ij, r_kj, &cos_theta, &t1, &t2); /* 41 */
1449 dt = theta - forceparams[type].qangle.theta*DEG2RAD; /* 2 */
1452 va = forceparams[type].qangle.c[0];
1454 for (j = 1; j <= 4; j++)
1456 c = forceparams[type].qangle.c[j];
1465 cos_theta2 = sqr(cos_theta); /* 1 */
1474 st = dVdt*gmx_invsqrt(1 - cos_theta2); /* 12 */
1475 sth = st*cos_theta; /* 1 */
1479 fprintf(debug, "ANGLES: theta = %10g vth = %10g dV/dtheta = %10g\n",
1480 theta*RAD2DEG, va, dVdt);
1483 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1484 nrij2 = iprod(r_ij, r_ij);
1486 cik = st*gmx_invsqrt(nrkj2*nrij2); /* 12 */
1487 cii = sth/nrij2; /* 10 */
1488 ckk = sth/nrkj2; /* 10 */
1490 for (m = 0; (m < DIM); m++) /* 39 */
1492 f_i[m] = -(cik*r_kj[m]-cii*r_ij[m]);
1493 f_k[m] = -(cik*r_ij[m]-ckk*r_kj[m]);
1494 f_j[m] = -f_i[m]-f_k[m];
1501 copy_ivec(SHIFT_IVEC(g, aj), jt);
1503 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1504 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1505 t1 = IVEC2IS(dt_ij);
1506 t2 = IVEC2IS(dt_kj);
1508 rvec_inc(fshift[t1], f_i);
1509 rvec_inc(fshift[CENTRAL], f_j);
1510 rvec_inc(fshift[t2], f_k);
1516 real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl,
1518 rvec r_ij, rvec r_kj, rvec r_kl, rvec m, rvec n,
1519 real *sign, int *t1, int *t2, int *t3)
1523 *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /* 3 */
1524 *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /* 3 */
1525 *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /* 3 */
1527 cprod(r_ij, r_kj, m); /* 9 */
1528 cprod(r_kj, r_kl, n); /* 9 */
1529 phi = gmx_angle(m, n); /* 49 (assuming 25 for atan2) */
1530 ipr = iprod(r_ij, n); /* 5 */
1531 (*sign) = (ipr < 0.0) ? -1.0 : 1.0;
1532 phi = (*sign)*phi; /* 1 */
1540 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1541 * also calculates the pre-factor required for the dihedral force update.
1542 * Note that bv and buf should be register aligned.
1544 static gmx_inline void
1545 dih_angle_simd(const rvec *x,
1546 const int *ai, const int *aj, const int *ak, const int *al,
1547 const pbc_simd_t *pbc,
1550 gmx_mm_pr *mx_S, gmx_mm_pr *my_S, gmx_mm_pr *mz_S,
1551 gmx_mm_pr *nx_S, gmx_mm_pr *ny_S, gmx_mm_pr *nz_S,
1552 gmx_mm_pr *nrkj_m2_S,
1553 gmx_mm_pr *nrkj_n2_S,
1557 #define UNROLL GMX_SIMD_WIDTH_HERE
1559 gmx_mm_pr rijx_S, rijy_S, rijz_S;
1560 gmx_mm_pr rkjx_S, rkjy_S, rkjz_S;
1561 gmx_mm_pr rklx_S, rkly_S, rklz_S;
1562 gmx_mm_pr cx_S, cy_S, cz_S;
1566 gmx_mm_pr iprm_S, iprn_S;
1567 gmx_mm_pr nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1569 gmx_mm_pr fmin_S = gmx_set1_pr(GMX_FLOAT_MIN);
1570 /* Using -0.0 should lead to only the sign bit being set */
1571 gmx_mm_pr sign_mask_S = gmx_set1_pr(-0.0);
1573 for (s = 0; s < UNROLL; s++)
1575 /* If you can't use pbc_dx_simd below for PBC, e.g. because
1576 * you can't round in SIMD, use pbc_rvec_sub here.
1578 for (m = 0; m < DIM; m++)
1580 dr[s + (0*DIM + m)*UNROLL] = x[ai[s]][m] - x[aj[s]][m];
1581 dr[s + (1*DIM + m)*UNROLL] = x[ak[s]][m] - x[aj[s]][m];
1582 dr[s + (2*DIM + m)*UNROLL] = x[ak[s]][m] - x[al[s]][m];
1586 rijx_S = gmx_load_pr(dr + 0*UNROLL);
1587 rijy_S = gmx_load_pr(dr + 1*UNROLL);
1588 rijz_S = gmx_load_pr(dr + 2*UNROLL);
1589 rkjx_S = gmx_load_pr(dr + 3*UNROLL);
1590 rkjy_S = gmx_load_pr(dr + 4*UNROLL);
1591 rkjz_S = gmx_load_pr(dr + 5*UNROLL);
1592 rklx_S = gmx_load_pr(dr + 6*UNROLL);
1593 rkly_S = gmx_load_pr(dr + 7*UNROLL);
1594 rklz_S = gmx_load_pr(dr + 8*UNROLL);
1596 pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc);
1597 pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc);
1598 pbc_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc);
1600 gmx_cprod_pr(rijx_S, rijy_S, rijz_S,
1601 rkjx_S, rkjy_S, rkjz_S,
1604 gmx_cprod_pr(rkjx_S, rkjy_S, rkjz_S,
1605 rklx_S, rkly_S, rklz_S,
1608 gmx_cprod_pr(*mx_S, *my_S, *mz_S,
1609 *nx_S, *ny_S, *nz_S,
1610 &cx_S, &cy_S, &cz_S);
1612 cn_S = gmx_sqrt_pr(gmx_norm2_pr(cx_S, cy_S, cz_S));
1614 s_S = gmx_iprod_pr(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1616 /* Determine the dihedral angle, the sign might need correction */
1617 *phi_S = gmx_atan2_pr(cn_S, s_S);
1619 ipr_S = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1620 *nx_S, *ny_S, *nz_S);
1622 iprm_S = gmx_norm2_pr(*mx_S, *my_S, *mz_S);
1623 iprn_S = gmx_norm2_pr(*nx_S, *ny_S, *nz_S);
1625 nrkj2_S = gmx_norm2_pr(rkjx_S, rkjy_S, rkjz_S);
1627 /* Avoid division by zero. When zero, the result is multiplied by 0
1628 * anyhow, so the 3 max below do not affect the final result.
1630 nrkj2_S = gmx_max_pr(nrkj2_S, fmin_S);
1631 nrkj_1_S = gmx_invsqrt_pr(nrkj2_S);
1632 nrkj_2_S = gmx_mul_pr(nrkj_1_S, nrkj_1_S);
1633 nrkj_S = gmx_mul_pr(nrkj2_S, nrkj_1_S);
1635 iprm_S = gmx_max_pr(iprm_S, fmin_S);
1636 iprn_S = gmx_max_pr(iprn_S, fmin_S);
1637 *nrkj_m2_S = gmx_mul_pr(nrkj_S, gmx_inv_pr(iprm_S));
1638 *nrkj_n2_S = gmx_mul_pr(nrkj_S, gmx_inv_pr(iprn_S));
1640 /* Set sign of the angle with the sign of ipr_S.
1641 * Since phi is currently positive, we can use OR instead of XOR.
1643 *phi_S = gmx_or_pr(*phi_S, gmx_and_pr(ipr_S, sign_mask_S));
1645 p_S = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1646 rkjx_S, rkjy_S, rkjz_S);
1647 p_S = gmx_mul_pr(p_S, nrkj_2_S);
1649 q_S = gmx_iprod_pr(rklx_S, rkly_S, rklz_S,
1650 rkjx_S, rkjy_S, rkjz_S);
1651 q_S = gmx_mul_pr(q_S, nrkj_2_S);
1653 gmx_store_pr(p, p_S);
1654 gmx_store_pr(q, q_S);
1658 #endif /* SIMD_BONDEDS */
1661 void do_dih_fup(int i, int j, int k, int l, real ddphi,
1662 rvec r_ij, rvec r_kj, rvec r_kl,
1663 rvec m, rvec n, rvec f[], rvec fshift[],
1664 const t_pbc *pbc, const t_graph *g,
1665 const rvec x[], int t1, int t2, int t3)
1668 rvec f_i, f_j, f_k, f_l;
1669 rvec uvec, vvec, svec, dx_jl;
1670 real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1671 real a, b, p, q, toler;
1672 ivec jt, dt_ij, dt_kj, dt_lj;
1674 iprm = iprod(m, m); /* 5 */
1675 iprn = iprod(n, n); /* 5 */
1676 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1677 toler = nrkj2*GMX_REAL_EPS;
1678 if ((iprm > toler) && (iprn > toler))
1680 nrkj_1 = gmx_invsqrt(nrkj2); /* 10 */
1681 nrkj_2 = nrkj_1*nrkj_1; /* 1 */
1682 nrkj = nrkj2*nrkj_1; /* 1 */
1683 a = -ddphi*nrkj/iprm; /* 11 */
1684 svmul(a, m, f_i); /* 3 */
1685 b = ddphi*nrkj/iprn; /* 11 */
1686 svmul(b, n, f_l); /* 3 */
1687 p = iprod(r_ij, r_kj); /* 5 */
1688 p *= nrkj_2; /* 1 */
1689 q = iprod(r_kl, r_kj); /* 5 */
1690 q *= nrkj_2; /* 1 */
1691 svmul(p, f_i, uvec); /* 3 */
1692 svmul(q, f_l, vvec); /* 3 */
1693 rvec_sub(uvec, vvec, svec); /* 3 */
1694 rvec_sub(f_i, svec, f_j); /* 3 */
1695 rvec_add(f_l, svec, f_k); /* 3 */
1696 rvec_inc(f[i], f_i); /* 3 */
1697 rvec_dec(f[j], f_j); /* 3 */
1698 rvec_dec(f[k], f_k); /* 3 */
1699 rvec_inc(f[l], f_l); /* 3 */
1703 copy_ivec(SHIFT_IVEC(g, j), jt);
1704 ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1705 ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1706 ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1707 t1 = IVEC2IS(dt_ij);
1708 t2 = IVEC2IS(dt_kj);
1709 t3 = IVEC2IS(dt_lj);
1713 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1720 rvec_inc(fshift[t1], f_i);
1721 rvec_dec(fshift[CENTRAL], f_j);
1722 rvec_dec(fshift[t2], f_k);
1723 rvec_inc(fshift[t3], f_l);
1728 /* As do_dih_fup above, but without shift forces */
1730 do_dih_fup_noshiftf(int i, int j, int k, int l, real ddphi,
1731 rvec r_ij, rvec r_kj, rvec r_kl,
1732 rvec m, rvec n, rvec f[])
1734 rvec f_i, f_j, f_k, f_l;
1735 rvec uvec, vvec, svec, dx_jl;
1736 real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1737 real a, b, p, q, toler;
1738 ivec jt, dt_ij, dt_kj, dt_lj;
1740 iprm = iprod(m, m); /* 5 */
1741 iprn = iprod(n, n); /* 5 */
1742 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1743 toler = nrkj2*GMX_REAL_EPS;
1744 if ((iprm > toler) && (iprn > toler))
1746 nrkj_1 = gmx_invsqrt(nrkj2); /* 10 */
1747 nrkj_2 = nrkj_1*nrkj_1; /* 1 */
1748 nrkj = nrkj2*nrkj_1; /* 1 */
1749 a = -ddphi*nrkj/iprm; /* 11 */
1750 svmul(a, m, f_i); /* 3 */
1751 b = ddphi*nrkj/iprn; /* 11 */
1752 svmul(b, n, f_l); /* 3 */
1753 p = iprod(r_ij, r_kj); /* 5 */
1754 p *= nrkj_2; /* 1 */
1755 q = iprod(r_kl, r_kj); /* 5 */
1756 q *= nrkj_2; /* 1 */
1757 svmul(p, f_i, uvec); /* 3 */
1758 svmul(q, f_l, vvec); /* 3 */
1759 rvec_sub(uvec, vvec, svec); /* 3 */
1760 rvec_sub(f_i, svec, f_j); /* 3 */
1761 rvec_add(f_l, svec, f_k); /* 3 */
1762 rvec_inc(f[i], f_i); /* 3 */
1763 rvec_dec(f[j], f_j); /* 3 */
1764 rvec_dec(f[k], f_k); /* 3 */
1765 rvec_inc(f[l], f_l); /* 3 */
1769 /* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
1770 static gmx_inline void
1771 do_dih_fup_noshiftf_precalc(int i, int j, int k, int l,
1773 real f_i_x, real f_i_y, real f_i_z,
1774 real mf_l_x, real mf_l_y, real mf_l_z,
1777 rvec f_i, f_j, f_k, f_l;
1778 rvec uvec, vvec, svec;
1786 svmul(p, f_i, uvec);
1787 svmul(q, f_l, vvec);
1788 rvec_sub(uvec, vvec, svec);
1789 rvec_sub(f_i, svec, f_j);
1790 rvec_add(f_l, svec, f_k);
1791 rvec_inc(f[i], f_i);
1792 rvec_dec(f[j], f_j);
1793 rvec_dec(f[k], f_k);
1794 rvec_inc(f[l], f_l);
1798 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult,
1799 real phi, real lambda, real *V, real *F)
1801 real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1802 real L1 = 1.0 - lambda;
1803 real ph0 = (L1*phiA + lambda*phiB)*DEG2RAD;
1804 real dph0 = (phiB - phiA)*DEG2RAD;
1805 real cp = L1*cpA + lambda*cpB;
1807 mdphi = mult*phi - ph0;
1809 ddphi = -cp*mult*sdphi;
1810 v1 = 1.0 + cos(mdphi);
1813 dvdlambda = (cpB - cpA)*v1 + cp*dph0*sdphi;
1820 /* That was 40 flops */
1824 dopdihs_noener(real cpA, real cpB, real phiA, real phiB, int mult,
1825 real phi, real lambda, real *F)
1827 real mdphi, sdphi, ddphi;
1828 real L1 = 1.0 - lambda;
1829 real ph0 = (L1*phiA + lambda*phiB)*DEG2RAD;
1830 real cp = L1*cpA + lambda*cpB;
1832 mdphi = mult*phi - ph0;
1834 ddphi = -cp*mult*sdphi;
1838 /* That was 20 flops */
1842 dopdihs_mdphi(real cpA, real cpB, real phiA, real phiB, int mult,
1843 real phi, real lambda, real *cp, real *mdphi)
1845 real L1 = 1.0 - lambda;
1846 real ph0 = (L1*phiA + lambda*phiB)*DEG2RAD;
1848 *cp = L1*cpA + lambda*cpB;
1850 *mdphi = mult*phi - ph0;
1853 static real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult,
1854 real phi, real lambda, real *V, real *F)
1855 /* similar to dopdihs, except for a minus sign *
1856 * and a different treatment of mult/phi0 */
1858 real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1859 real L1 = 1.0 - lambda;
1860 real ph0 = (L1*phiA + lambda*phiB)*DEG2RAD;
1861 real dph0 = (phiB - phiA)*DEG2RAD;
1862 real cp = L1*cpA + lambda*cpB;
1864 mdphi = mult*(phi-ph0);
1866 ddphi = cp*mult*sdphi;
1867 v1 = 1.0-cos(mdphi);
1870 dvdlambda = (cpB-cpA)*v1 + cp*dph0*sdphi;
1877 /* That was 40 flops */
1880 real pdihs(int nbonds,
1881 const t_iatom forceatoms[], const t_iparams forceparams[],
1882 const rvec x[], rvec f[], rvec fshift[],
1883 const t_pbc *pbc, const t_graph *g,
1884 real lambda, real *dvdlambda,
1885 const t_mdatoms *md, t_fcdata *fcd,
1886 int *global_atom_index)
1888 int i, type, ai, aj, ak, al;
1890 rvec r_ij, r_kj, r_kl, m, n;
1891 real phi, sign, ddphi, vpd, vtot;
1895 for (i = 0; (i < nbonds); )
1897 type = forceatoms[i++];
1898 ai = forceatoms[i++];
1899 aj = forceatoms[i++];
1900 ak = forceatoms[i++];
1901 al = forceatoms[i++];
1903 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1904 &sign, &t1, &t2, &t3); /* 84 */
1905 *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1906 forceparams[type].pdihs.cpB,
1907 forceparams[type].pdihs.phiA,
1908 forceparams[type].pdihs.phiB,
1909 forceparams[type].pdihs.mult,
1910 phi, lambda, &vpd, &ddphi);
1913 do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
1914 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
1917 fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
1918 ai, aj, ak, al, phi);
1925 void make_dp_periodic(real *dp) /* 1 flop? */
1927 /* dp cannot be outside (-pi,pi) */
1932 else if (*dp < -M_PI)
1939 /* As pdihs above, but without calculating energies and shift forces */
1941 pdihs_noener(int nbonds,
1942 const t_iatom forceatoms[], const t_iparams forceparams[],
1943 const rvec x[], rvec f[],
1944 const t_pbc *pbc, const t_graph *g,
1946 const t_mdatoms *md, t_fcdata *fcd,
1947 int *global_atom_index)
1949 int i, type, ai, aj, ak, al;
1951 rvec r_ij, r_kj, r_kl, m, n;
1952 real phi, sign, ddphi_tot, ddphi;
1954 for (i = 0; (i < nbonds); )
1956 ai = forceatoms[i+1];
1957 aj = forceatoms[i+2];
1958 ak = forceatoms[i+3];
1959 al = forceatoms[i+4];
1961 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1962 &sign, &t1, &t2, &t3);
1966 /* Loop over dihedrals working on the same atoms,
1967 * so we avoid recalculating angles and force distributions.
1971 type = forceatoms[i];
1972 dopdihs_noener(forceparams[type].pdihs.cpA,
1973 forceparams[type].pdihs.cpB,
1974 forceparams[type].pdihs.phiA,
1975 forceparams[type].pdihs.phiB,
1976 forceparams[type].pdihs.mult,
1977 phi, lambda, &ddphi);
1982 while (i < nbonds &&
1983 forceatoms[i+1] == ai &&
1984 forceatoms[i+2] == aj &&
1985 forceatoms[i+3] == ak &&
1986 forceatoms[i+4] == al);
1988 do_dih_fup_noshiftf(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f);
1995 /* As pdihs_noner above, but using SIMD to calculate many dihedrals at once */
1997 pdihs_noener_simd(int nbonds,
1998 const t_iatom forceatoms[], const t_iparams forceparams[],
1999 const rvec x[], rvec f[],
2000 const t_pbc *pbc, const t_graph *g,
2002 const t_mdatoms *md, t_fcdata *fcd,
2003 int *global_atom_index)
2005 #define UNROLL GMX_SIMD_WIDTH_HERE
2008 int type, ai[UNROLL], aj[UNROLL], ak[UNROLL], al[UNROLL];
2009 int t1[UNROLL], t2[UNROLL], t3[UNROLL];
2011 real dr_array[3*DIM*UNROLL+UNROLL], *dr;
2012 real buf_array[7*UNROLL+UNROLL], *buf;
2013 real *cp, *phi0, *mult, *phi, *p, *q, *sf_i, *msf_l;
2014 gmx_mm_pr phi0_S, phi_S;
2015 gmx_mm_pr mx_S, my_S, mz_S;
2016 gmx_mm_pr nx_S, ny_S, nz_S;
2017 gmx_mm_pr nrkj_m2_S, nrkj_n2_S;
2018 gmx_mm_pr cp_S, mdphi_S, mult_S;
2019 gmx_mm_pr sin_S, cos_S;
2021 gmx_mm_pr sf_i_S, msf_l_S;
2022 pbc_simd_t pbc_simd;
2024 /* Ensure SIMD register alignment */
2025 dr = gmx_simd_align_real(dr_array);
2026 buf = gmx_simd_align_real(buf_array);
2028 /* Extract aligned pointer for parameters and variables */
2029 cp = buf + 0*UNROLL;
2030 phi0 = buf + 1*UNROLL;
2031 mult = buf + 2*UNROLL;
2034 sf_i = buf + 5*UNROLL;
2035 msf_l = buf + 6*UNROLL;
2037 set_pbc_simd(pbc, &pbc_simd);
2039 /* nbonds is the number of dihedrals times nfa1, here we step UNROLL dihs */
2040 for (i = 0; (i < nbonds); i += UNROLL*nfa1)
2042 /* Collect atoms quadruplets for UNROLL dihedrals.
2043 * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2046 for (s = 0; s < UNROLL; s++)
2048 type = forceatoms[iu];
2049 ai[s] = forceatoms[iu+1];
2050 aj[s] = forceatoms[iu+2];
2051 ak[s] = forceatoms[iu+3];
2052 al[s] = forceatoms[iu+4];
2054 cp[s] = forceparams[type].pdihs.cpA;
2055 phi0[s] = forceparams[type].pdihs.phiA*DEG2RAD;
2056 mult[s] = forceparams[type].pdihs.mult;
2058 /* At the end fill the arrays with identical entries */
2059 if (iu + nfa1 < nbonds)
2065 /* Caclulate UNROLL dihedral angles at once */
2066 dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
2069 &mx_S, &my_S, &mz_S,
2070 &nx_S, &ny_S, &nz_S,
2075 cp_S = gmx_load_pr(cp);
2076 phi0_S = gmx_load_pr(phi0);
2077 mult_S = gmx_load_pr(mult);
2079 mdphi_S = gmx_sub_pr(gmx_mul_pr(mult_S, phi_S), phi0_S);
2081 /* Calculate UNROLL sines at once */
2082 gmx_sincos_pr(mdphi_S, &sin_S, &cos_S);
2083 mddphi_S = gmx_mul_pr(gmx_mul_pr(cp_S, mult_S), sin_S);
2084 sf_i_S = gmx_mul_pr(mddphi_S, nrkj_m2_S);
2085 msf_l_S = gmx_mul_pr(mddphi_S, nrkj_n2_S);
2087 /* After this m?_S will contain f[i] */
2088 mx_S = gmx_mul_pr(sf_i_S, mx_S);
2089 my_S = gmx_mul_pr(sf_i_S, my_S);
2090 mz_S = gmx_mul_pr(sf_i_S, mz_S);
2092 /* After this m?_S will contain -f[l] */
2093 nx_S = gmx_mul_pr(msf_l_S, nx_S);
2094 ny_S = gmx_mul_pr(msf_l_S, ny_S);
2095 nz_S = gmx_mul_pr(msf_l_S, nz_S);
2097 gmx_store_pr(dr + 0*UNROLL, mx_S);
2098 gmx_store_pr(dr + 1*UNROLL, my_S);
2099 gmx_store_pr(dr + 2*UNROLL, mz_S);
2100 gmx_store_pr(dr + 3*UNROLL, nx_S);
2101 gmx_store_pr(dr + 4*UNROLL, ny_S);
2102 gmx_store_pr(dr + 5*UNROLL, nz_S);
2108 do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
2113 dr[(DIM+XX)*UNROLL+s],
2114 dr[(DIM+YY)*UNROLL+s],
2115 dr[(DIM+ZZ)*UNROLL+s],
2120 while (s < UNROLL && iu < nbonds);
2125 #endif /* SIMD_BONDEDS */
2128 real idihs(int nbonds,
2129 const t_iatom forceatoms[], const t_iparams forceparams[],
2130 const rvec x[], rvec f[], rvec fshift[],
2131 const t_pbc *pbc, const t_graph *g,
2132 real lambda, real *dvdlambda,
2133 const t_mdatoms *md, t_fcdata *fcd,
2134 int *global_atom_index)
2136 int i, type, ai, aj, ak, al;
2138 real phi, phi0, dphi0, ddphi, sign, vtot;
2139 rvec r_ij, r_kj, r_kl, m, n;
2140 real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2145 for (i = 0; (i < nbonds); )
2147 type = forceatoms[i++];
2148 ai = forceatoms[i++];
2149 aj = forceatoms[i++];
2150 ak = forceatoms[i++];
2151 al = forceatoms[i++];
2153 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2154 &sign, &t1, &t2, &t3); /* 84 */
2156 /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2157 * force changes if we just apply a normal harmonic.
2158 * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2159 * This means we will never have the periodicity problem, unless
2160 * the dihedral is Pi away from phiO, which is very unlikely due to
2163 kA = forceparams[type].harmonic.krA;
2164 kB = forceparams[type].harmonic.krB;
2165 pA = forceparams[type].harmonic.rA;
2166 pB = forceparams[type].harmonic.rB;
2168 kk = L1*kA + lambda*kB;
2169 phi0 = (L1*pA + lambda*pB)*DEG2RAD;
2170 dphi0 = (pB - pA)*DEG2RAD;
2174 make_dp_periodic(&dp);
2181 dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
2183 do_dih_fup(ai, aj, ak, al, (real)(-ddphi), r_ij, r_kj, r_kl, m, n,
2184 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
2189 fprintf(debug, "idih: (%d,%d,%d,%d) phi=%g\n",
2190 ai, aj, ak, al, phi);
2195 *dvdlambda += dvdl_term;
2200 /*! \brief returns dx, rdist, and dpdl for functions posres() and fbposres()
2202 static void posres_dx(const rvec x, const rvec pos0A, const rvec pos0B,
2203 const rvec comA_sc, const rvec comB_sc,
2205 t_pbc *pbc, int refcoord_scaling, int npbcdim,
2206 rvec dx, rvec rdist, rvec dpdl)
2209 real posA, posB, L1, ref = 0.;
2214 for (m = 0; m < DIM; m++)
2220 switch (refcoord_scaling)
2224 rdist[m] = L1*posA + lambda*posB;
2225 dpdl[m] = posB - posA;
2228 /* Box relative coordinates are stored for dimensions with pbc */
2229 posA *= pbc->box[m][m];
2230 posB *= pbc->box[m][m];
2231 for (d = m+1; d < npbcdim; d++)
2233 posA += pos0A[d]*pbc->box[d][m];
2234 posB += pos0B[d]*pbc->box[d][m];
2236 ref = L1*posA + lambda*posB;
2238 dpdl[m] = posB - posA;
2241 ref = L1*comA_sc[m] + lambda*comB_sc[m];
2242 rdist[m] = L1*posA + lambda*posB;
2243 dpdl[m] = comB_sc[m] - comA_sc[m] + posB - posA;
2246 gmx_fatal(FARGS, "No such scaling method implemented");
2251 ref = L1*posA + lambda*posB;
2253 dpdl[m] = posB - posA;
2256 /* We do pbc_dx with ref+rdist,
2257 * since with only ref we can be up to half a box vector wrong.
2259 pos[m] = ref + rdist[m];
2264 pbc_dx(pbc, x, pos, dx);
2268 rvec_sub(x, pos, dx);
2272 /*! \brief Adds forces of flat-bottomed positions restraints to f[]
2273 * and fixes vir_diag. Returns the flat-bottomed potential. */
2274 real fbposres(int nbonds,
2275 const t_iatom forceatoms[], const t_iparams forceparams[],
2276 const rvec x[], rvec f[], rvec vir_diag,
2278 int refcoord_scaling, int ePBC, rvec com)
2279 /* compute flat-bottomed positions restraints */
2281 int i, ai, m, d, type, npbcdim = 0, fbdim;
2282 const t_iparams *pr;
2284 real ref = 0, dr, dr2, rpot, rfb, rfb2, fact, invdr;
2285 rvec com_sc, rdist, pos, dx, dpdl, fm;
2288 npbcdim = ePBC2npbcdim(ePBC);
2290 if (refcoord_scaling == erscCOM)
2293 for (m = 0; m < npbcdim; m++)
2295 for (d = m; d < npbcdim; d++)
2297 com_sc[m] += com[d]*pbc->box[d][m];
2303 for (i = 0; (i < nbonds); )
2305 type = forceatoms[i++];
2306 ai = forceatoms[i++];
2307 pr = &forceparams[type];
2309 /* same calculation as for normal posres, but with identical A and B states, and lambda==0 */
2310 posres_dx(x[ai], forceparams[type].fbposres.pos0, forceparams[type].fbposres.pos0,
2311 com_sc, com_sc, 0.0,
2312 pbc, refcoord_scaling, npbcdim,
2318 kk = pr->fbposres.k;
2319 rfb = pr->fbposres.r;
2322 /* with rfb<0, push particle out of the sphere/cylinder/layer */
2330 switch (pr->fbposres.geom)
2332 case efbposresSPHERE:
2333 /* spherical flat-bottom posres */
2336 ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) )
2340 v = 0.5*kk*sqr(dr - rfb);
2341 fact = -kk*(dr-rfb)/dr; /* Force pointing to the center pos0 */
2342 svmul(fact, dx, fm);
2345 case efbposresCYLINDER:
2346 /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
2347 dr2 = sqr(dx[XX])+sqr(dx[YY]);
2349 ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) )
2354 v = 0.5*kk*sqr(dr - rfb);
2355 fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr; /* Force pointing to the center */
2356 fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr;
2359 case efbposresX: /* fbdim=XX */
2360 case efbposresY: /* fbdim=YY */
2361 case efbposresZ: /* fbdim=ZZ */
2362 /* 1D flat-bottom potential */
2363 fbdim = pr->fbposres.geom - efbposresX;
2365 if ( ( dr > rfb && bInvert == FALSE ) || ( 0 < dr && dr < rfb && bInvert == TRUE ) )
2367 v = 0.5*kk*sqr(dr - rfb);
2368 fm[fbdim] = -kk*(dr - rfb);
2370 else if ( (dr < (-rfb) && bInvert == FALSE ) || ( (-rfb) < dr && dr < 0 && bInvert == TRUE ))
2372 v = 0.5*kk*sqr(dr + rfb);
2373 fm[fbdim] = -kk*(dr + rfb);
2380 for (m = 0; (m < DIM); m++)
2383 /* Here we correct for the pbc_dx which included rdist */
2384 vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm[m];
2392 real posres(int nbonds,
2393 const t_iatom forceatoms[], const t_iparams forceparams[],
2394 const rvec x[], rvec f[], rvec vir_diag,
2396 real lambda, real *dvdlambda,
2397 int refcoord_scaling, int ePBC, rvec comA, rvec comB)
2399 int i, ai, m, d, type, ki, npbcdim = 0;
2400 const t_iparams *pr;
2403 real posA, posB, ref = 0;
2404 rvec comA_sc, comB_sc, rdist, dpdl, pos, dx;
2405 gmx_bool bForceValid = TRUE;
2407 if ((f == NULL) || (vir_diag == NULL)) /* should both be null together! */
2409 bForceValid = FALSE;
2412 npbcdim = ePBC2npbcdim(ePBC);
2414 if (refcoord_scaling == erscCOM)
2416 clear_rvec(comA_sc);
2417 clear_rvec(comB_sc);
2418 for (m = 0; m < npbcdim; m++)
2420 for (d = m; d < npbcdim; d++)
2422 comA_sc[m] += comA[d]*pbc->box[d][m];
2423 comB_sc[m] += comB[d]*pbc->box[d][m];
2431 for (i = 0; (i < nbonds); )
2433 type = forceatoms[i++];
2434 ai = forceatoms[i++];
2435 pr = &forceparams[type];
2437 /* return dx, rdist, and dpdl */
2438 posres_dx(x[ai], forceparams[type].posres.pos0A, forceparams[type].posres.pos0B,
2439 comA_sc, comB_sc, lambda,
2440 pbc, refcoord_scaling, npbcdim,
2443 for (m = 0; (m < DIM); m++)
2445 kk = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
2447 vtot += 0.5*kk*dx[m]*dx[m];
2449 0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
2452 /* Here we correct for the pbc_dx which included rdist */
2456 vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
2464 static real low_angres(int nbonds,
2465 const t_iatom forceatoms[], const t_iparams forceparams[],
2466 const rvec x[], rvec f[], rvec fshift[],
2467 const t_pbc *pbc, const t_graph *g,
2468 real lambda, real *dvdlambda,
2471 int i, m, type, ai, aj, ak, al;
2473 real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2474 rvec r_ij, r_kl, f_i, f_k = {0, 0, 0};
2475 real st, sth, nrij2, nrkl2, c, cij, ckl;
2478 t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2481 ak = al = 0; /* to avoid warnings */
2482 for (i = 0; i < nbonds; )
2484 type = forceatoms[i++];
2485 ai = forceatoms[i++];
2486 aj = forceatoms[i++];
2487 t1 = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij); /* 3 */
2490 ak = forceatoms[i++];
2491 al = forceatoms[i++];
2492 t2 = pbc_rvec_sub(pbc, x[al], x[ak], r_kl); /* 3 */
2501 cos_phi = cos_angle(r_ij, r_kl); /* 25 */
2502 phi = acos(cos_phi); /* 10 */
2504 *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2505 forceparams[type].pdihs.cpB,
2506 forceparams[type].pdihs.phiA,
2507 forceparams[type].pdihs.phiB,
2508 forceparams[type].pdihs.mult,
2509 phi, lambda, &vid, &dVdphi); /* 40 */
2513 cos_phi2 = sqr(cos_phi); /* 1 */
2516 st = -dVdphi*gmx_invsqrt(1 - cos_phi2); /* 12 */
2517 sth = st*cos_phi; /* 1 */
2518 nrij2 = iprod(r_ij, r_ij); /* 5 */
2519 nrkl2 = iprod(r_kl, r_kl); /* 5 */
2521 c = st*gmx_invsqrt(nrij2*nrkl2); /* 11 */
2522 cij = sth/nrij2; /* 10 */
2523 ckl = sth/nrkl2; /* 10 */
2525 for (m = 0; m < DIM; m++) /* 18+18 */
2527 f_i[m] = (c*r_kl[m]-cij*r_ij[m]);
2532 f_k[m] = (c*r_ij[m]-ckl*r_kl[m]);
2540 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2543 rvec_inc(fshift[t1], f_i);
2544 rvec_dec(fshift[CENTRAL], f_i);
2549 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2552 rvec_inc(fshift[t2], f_k);
2553 rvec_dec(fshift[CENTRAL], f_k);
2558 return vtot; /* 184 / 157 (bZAxis) total */
2561 real angres(int nbonds,
2562 const t_iatom forceatoms[], const t_iparams forceparams[],
2563 const rvec x[], rvec f[], rvec fshift[],
2564 const t_pbc *pbc, const t_graph *g,
2565 real lambda, real *dvdlambda,
2566 const t_mdatoms *md, t_fcdata *fcd,
2567 int *global_atom_index)
2569 return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2570 lambda, dvdlambda, FALSE);
2573 real angresz(int nbonds,
2574 const t_iatom forceatoms[], const t_iparams forceparams[],
2575 const rvec x[], rvec f[], rvec fshift[],
2576 const t_pbc *pbc, const t_graph *g,
2577 real lambda, real *dvdlambda,
2578 const t_mdatoms *md, t_fcdata *fcd,
2579 int *global_atom_index)
2581 return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2582 lambda, dvdlambda, TRUE);
2585 real dihres(int nbonds,
2586 const t_iatom forceatoms[], const t_iparams forceparams[],
2587 const rvec x[], rvec f[], rvec fshift[],
2588 const t_pbc *pbc, const t_graph *g,
2589 real lambda, real *dvdlambda,
2590 const t_mdatoms *md, t_fcdata *fcd,
2591 int *global_atom_index)
2594 int ai, aj, ak, al, i, k, type, t1, t2, t3;
2595 real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2596 real phi, ddphi, ddp, ddp2, dp, sign, d2r, fc, L1;
2597 rvec r_ij, r_kj, r_kl, m, n;
2604 for (i = 0; (i < nbonds); )
2606 type = forceatoms[i++];
2607 ai = forceatoms[i++];
2608 aj = forceatoms[i++];
2609 ak = forceatoms[i++];
2610 al = forceatoms[i++];
2612 phi0A = forceparams[type].dihres.phiA*d2r;
2613 dphiA = forceparams[type].dihres.dphiA*d2r;
2614 kfacA = forceparams[type].dihres.kfacA;
2616 phi0B = forceparams[type].dihres.phiB*d2r;
2617 dphiB = forceparams[type].dihres.dphiB*d2r;
2618 kfacB = forceparams[type].dihres.kfacB;
2620 phi0 = L1*phi0A + lambda*phi0B;
2621 dphi = L1*dphiA + lambda*dphiB;
2622 kfac = L1*kfacA + lambda*kfacB;
2624 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2625 &sign, &t1, &t2, &t3);
2630 fprintf(debug, "dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
2631 k++, ai, aj, ak, al, phi0, dphi, kfac);
2633 /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2634 * force changes if we just apply a normal harmonic.
2635 * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2636 * This means we will never have the periodicity problem, unless
2637 * the dihedral is Pi away from phiO, which is very unlikely due to
2641 make_dp_periodic(&dp);
2647 else if (dp < -dphi)
2659 vtot += 0.5*kfac*ddp2;
2662 *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2663 /* lambda dependence from changing restraint distances */
2666 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2670 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2672 do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2673 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
2680 real unimplemented(int nbonds,
2681 const t_iatom forceatoms[], const t_iparams forceparams[],
2682 const rvec x[], rvec f[], rvec fshift[],
2683 const t_pbc *pbc, const t_graph *g,
2684 real lambda, real *dvdlambda,
2685 const t_mdatoms *md, t_fcdata *fcd,
2686 int *global_atom_index)
2688 gmx_impl("*** you are using a not implemented function");
2690 return 0.0; /* To make the compiler happy */
2693 real rbdihs(int nbonds,
2694 const t_iatom forceatoms[], const t_iparams forceparams[],
2695 const rvec x[], rvec f[], rvec fshift[],
2696 const t_pbc *pbc, const t_graph *g,
2697 real lambda, real *dvdlambda,
2698 const t_mdatoms *md, t_fcdata *fcd,
2699 int *global_atom_index)
2701 const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2702 int type, ai, aj, ak, al, i, j;
2704 rvec r_ij, r_kj, r_kl, m, n;
2705 real parmA[NR_RBDIHS];
2706 real parmB[NR_RBDIHS];
2707 real parm[NR_RBDIHS];
2708 real cos_phi, phi, rbp, rbpBA;
2709 real v, sign, ddphi, sin_phi;
2711 real L1 = 1.0-lambda;
2715 for (i = 0; (i < nbonds); )
2717 type = forceatoms[i++];
2718 ai = forceatoms[i++];
2719 aj = forceatoms[i++];
2720 ak = forceatoms[i++];
2721 al = forceatoms[i++];
2723 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2724 &sign, &t1, &t2, &t3); /* 84 */
2726 /* Change to polymer convention */
2733 phi -= M_PI; /* 1 */
2737 /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2740 for (j = 0; (j < NR_RBDIHS); j++)
2742 parmA[j] = forceparams[type].rbdihs.rbcA[j];
2743 parmB[j] = forceparams[type].rbdihs.rbcB[j];
2744 parm[j] = L1*parmA[j]+lambda*parmB[j];
2746 /* Calculate cosine powers */
2747 /* Calculate the energy */
2748 /* Calculate the derivative */
2751 dvdl_term += (parmB[0]-parmA[0]);
2756 rbpBA = parmB[1]-parmA[1];
2757 ddphi += rbp*cosfac;
2760 dvdl_term += cosfac*rbpBA;
2762 rbpBA = parmB[2]-parmA[2];
2763 ddphi += c2*rbp*cosfac;
2766 dvdl_term += cosfac*rbpBA;
2768 rbpBA = parmB[3]-parmA[3];
2769 ddphi += c3*rbp*cosfac;
2772 dvdl_term += cosfac*rbpBA;
2774 rbpBA = parmB[4]-parmA[4];
2775 ddphi += c4*rbp*cosfac;
2778 dvdl_term += cosfac*rbpBA;
2780 rbpBA = parmB[5]-parmA[5];
2781 ddphi += c5*rbp*cosfac;
2784 dvdl_term += cosfac*rbpBA;
2786 ddphi = -ddphi*sin_phi; /* 11 */
2788 do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2789 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
2792 *dvdlambda += dvdl_term;
2797 int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2803 ip = ip + grid_spacing - 1;
2805 else if (ip > grid_spacing)
2807 ip = ip - grid_spacing - 1;
2816 im1 = grid_spacing - 1;
2818 else if (ip == grid_spacing-2)
2822 else if (ip == grid_spacing-1)
2836 real cmap_dihs(int nbonds,
2837 const t_iatom forceatoms[], const t_iparams forceparams[],
2838 const gmx_cmap_t *cmap_grid,
2839 const rvec x[], rvec f[], rvec fshift[],
2840 const t_pbc *pbc, const t_graph *g,
2841 real lambda, real *dvdlambda,
2842 const t_mdatoms *md, t_fcdata *fcd,
2843 int *global_atom_index)
2845 int i, j, k, n, idx;
2846 int ai, aj, ak, al, am;
2847 int a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2849 int t11, t21, t31, t12, t22, t32;
2850 int iphi1, ip1m1, ip1p1, ip1p2;
2851 int iphi2, ip2m1, ip2p1, ip2p2;
2853 int pos1, pos2, pos3, pos4, tmp;
2855 real ty[4], ty1[4], ty2[4], ty12[4], tc[16], tx[16];
2856 real phi1, psi1, cos_phi1, sin_phi1, sign1, xphi1;
2857 real phi2, psi2, cos_phi2, sin_phi2, sign2, xphi2;
2858 real dx, xx, tt, tu, e, df1, df2, ddf1, ddf2, ddf12, vtot;
2859 real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2860 real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2861 real fg1, hg1, fga1, hgb1, gaa1, gbb1;
2862 real fg2, hg2, fga2, hgb2, gaa2, gbb2;
2865 rvec r1_ij, r1_kj, r1_kl, m1, n1;
2866 rvec r2_ij, r2_kj, r2_kl, m2, n2;
2867 rvec f1_i, f1_j, f1_k, f1_l;
2868 rvec f2_i, f2_j, f2_k, f2_l;
2869 rvec a1, b1, a2, b2;
2870 rvec f1, g1, h1, f2, g2, h2;
2871 rvec dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2872 ivec jt1, dt1_ij, dt1_kj, dt1_lj;
2873 ivec jt2, dt2_ij, dt2_kj, dt2_lj;
2877 int loop_index[4][4] = {
2884 /* Total CMAP energy */
2887 for (n = 0; n < nbonds; )
2889 /* Five atoms are involved in the two torsions */
2890 type = forceatoms[n++];
2891 ai = forceatoms[n++];
2892 aj = forceatoms[n++];
2893 ak = forceatoms[n++];
2894 al = forceatoms[n++];
2895 am = forceatoms[n++];
2897 /* Which CMAP type is this */
2898 cmapA = forceparams[type].cmap.cmapA;
2899 cmapd = cmap_grid->cmapdata[cmapA].cmap;
2907 phi1 = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2908 &sign1, &t11, &t21, &t31); /* 84 */
2910 cos_phi1 = cos(phi1);
2912 a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2913 a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2914 a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2916 b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2917 b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2918 b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2920 tmp = pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
2922 ra21 = iprod(a1, a1); /* 5 */
2923 rb21 = iprod(b1, b1); /* 5 */
2924 rg21 = iprod(r1_kj, r1_kj); /* 5 */
2930 rabr1 = sqrt(ra2r1*rb2r1);
2932 sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
2934 if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
2936 phi1 = asin(sin_phi1);
2946 phi1 = -M_PI - phi1;
2952 phi1 = acos(cos_phi1);
2960 xphi1 = phi1 + M_PI; /* 1 */
2962 /* Second torsion */
2968 phi2 = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
2969 &sign2, &t12, &t22, &t32); /* 84 */
2971 cos_phi2 = cos(phi2);
2973 a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
2974 a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
2975 a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
2977 b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
2978 b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
2979 b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
2981 tmp = pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
2983 ra22 = iprod(a2, a2); /* 5 */
2984 rb22 = iprod(b2, b2); /* 5 */
2985 rg22 = iprod(r2_kj, r2_kj); /* 5 */
2991 rabr2 = sqrt(ra2r2*rb2r2);
2993 sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
2995 if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
2997 phi2 = asin(sin_phi2);
3007 phi2 = -M_PI - phi2;
3013 phi2 = acos(cos_phi2);
3021 xphi2 = phi2 + M_PI; /* 1 */
3023 /* Range mangling */
3026 xphi1 = xphi1 + 2*M_PI;
3028 else if (xphi1 >= 2*M_PI)
3030 xphi1 = xphi1 - 2*M_PI;
3035 xphi2 = xphi2 + 2*M_PI;
3037 else if (xphi2 >= 2*M_PI)
3039 xphi2 = xphi2 - 2*M_PI;
3042 /* Number of grid points */
3043 dx = 2*M_PI / cmap_grid->grid_spacing;
3045 /* Where on the grid are we */
3046 iphi1 = (int)(xphi1/dx);
3047 iphi2 = (int)(xphi2/dx);
3049 iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3050 iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3052 pos1 = iphi1*cmap_grid->grid_spacing+iphi2;
3053 pos2 = ip1p1*cmap_grid->grid_spacing+iphi2;
3054 pos3 = ip1p1*cmap_grid->grid_spacing+ip2p1;
3055 pos4 = iphi1*cmap_grid->grid_spacing+ip2p1;
3057 ty[0] = cmapd[pos1*4];
3058 ty[1] = cmapd[pos2*4];
3059 ty[2] = cmapd[pos3*4];
3060 ty[3] = cmapd[pos4*4];
3062 ty1[0] = cmapd[pos1*4+1];
3063 ty1[1] = cmapd[pos2*4+1];
3064 ty1[2] = cmapd[pos3*4+1];
3065 ty1[3] = cmapd[pos4*4+1];
3067 ty2[0] = cmapd[pos1*4+2];
3068 ty2[1] = cmapd[pos2*4+2];
3069 ty2[2] = cmapd[pos3*4+2];
3070 ty2[3] = cmapd[pos4*4+2];
3072 ty12[0] = cmapd[pos1*4+3];
3073 ty12[1] = cmapd[pos2*4+3];
3074 ty12[2] = cmapd[pos3*4+3];
3075 ty12[3] = cmapd[pos4*4+3];
3077 /* Switch to degrees */
3078 dx = 360.0 / cmap_grid->grid_spacing;
3079 xphi1 = xphi1 * RAD2DEG;
3080 xphi2 = xphi2 * RAD2DEG;
3082 for (i = 0; i < 4; i++) /* 16 */
3085 tx[i+4] = ty1[i]*dx;
3086 tx[i+8] = ty2[i]*dx;
3087 tx[i+12] = ty12[i]*dx*dx;
3091 for (i = 0; i < 4; i++) /* 1056 */
3093 for (j = 0; j < 4; j++)
3096 for (k = 0; k < 16; k++)
3098 xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
3106 tt = (xphi1-iphi1*dx)/dx;
3107 tu = (xphi2-iphi2*dx)/dx;
3116 for (i = 3; i >= 0; i--)
3118 l1 = loop_index[i][3];
3119 l2 = loop_index[i][2];
3120 l3 = loop_index[i][1];
3122 e = tt * e + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
3123 df1 = tu * df1 + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
3124 df2 = tt * df2 + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
3125 ddf1 = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
3126 ddf2 = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
3129 ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
3130 3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
3135 ddf1 = ddf1 * fac * fac;
3136 ddf2 = ddf2 * fac * fac;
3137 ddf12 = ddf12 * fac * fac;
3142 /* Do forces - first torsion */
3143 fg1 = iprod(r1_ij, r1_kj);
3144 hg1 = iprod(r1_kl, r1_kj);
3145 fga1 = fg1*ra2r1*rgr1;
3146 hgb1 = hg1*rb2r1*rgr1;
3150 for (i = 0; i < DIM; i++)
3152 dtf1[i] = gaa1 * a1[i];
3153 dtg1[i] = fga1 * a1[i] - hgb1 * b1[i];
3154 dth1[i] = gbb1 * b1[i];
3156 f1[i] = df1 * dtf1[i];
3157 g1[i] = df1 * dtg1[i];
3158 h1[i] = df1 * dth1[i];
3161 f1_j[i] = -f1[i] - g1[i];
3162 f1_k[i] = h1[i] + g1[i];
3165 f[a1i][i] = f[a1i][i] + f1_i[i];
3166 f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3167 f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3168 f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3171 /* Do forces - second torsion */
3172 fg2 = iprod(r2_ij, r2_kj);
3173 hg2 = iprod(r2_kl, r2_kj);
3174 fga2 = fg2*ra2r2*rgr2;
3175 hgb2 = hg2*rb2r2*rgr2;
3179 for (i = 0; i < DIM; i++)
3181 dtf2[i] = gaa2 * a2[i];
3182 dtg2[i] = fga2 * a2[i] - hgb2 * b2[i];
3183 dth2[i] = gbb2 * b2[i];
3185 f2[i] = df2 * dtf2[i];
3186 g2[i] = df2 * dtg2[i];
3187 h2[i] = df2 * dth2[i];
3190 f2_j[i] = -f2[i] - g2[i];
3191 f2_k[i] = h2[i] + g2[i];
3194 f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3195 f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3196 f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3197 f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3203 copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3204 ivec_sub(SHIFT_IVEC(g, a1i), jt1, dt1_ij);
3205 ivec_sub(SHIFT_IVEC(g, a1k), jt1, dt1_kj);
3206 ivec_sub(SHIFT_IVEC(g, a1l), jt1, dt1_lj);
3207 t11 = IVEC2IS(dt1_ij);
3208 t21 = IVEC2IS(dt1_kj);
3209 t31 = IVEC2IS(dt1_lj);
3211 copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3212 ivec_sub(SHIFT_IVEC(g, a2i), jt2, dt2_ij);
3213 ivec_sub(SHIFT_IVEC(g, a2k), jt2, dt2_kj);
3214 ivec_sub(SHIFT_IVEC(g, a2l), jt2, dt2_lj);
3215 t12 = IVEC2IS(dt2_ij);
3216 t22 = IVEC2IS(dt2_kj);
3217 t32 = IVEC2IS(dt2_lj);
3221 t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3222 t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3230 rvec_inc(fshift[t11], f1_i);
3231 rvec_inc(fshift[CENTRAL], f1_j);
3232 rvec_inc(fshift[t21], f1_k);
3233 rvec_inc(fshift[t31], f1_l);
3235 rvec_inc(fshift[t21], f2_i);
3236 rvec_inc(fshift[CENTRAL], f2_j);
3237 rvec_inc(fshift[t22], f2_k);
3238 rvec_inc(fshift[t32], f2_l);
3245 /***********************************************************
3247 * G R O M O S 9 6 F U N C T I O N S
3249 ***********************************************************/
3250 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
3253 const real half = 0.5;
3254 real L1, kk, x0, dx, dx2;
3255 real v, f, dvdlambda;
3258 kk = L1*kA+lambda*kB;
3259 x0 = L1*xA+lambda*xB;
3266 dvdlambda = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
3273 /* That was 21 flops */
3276 real g96bonds(int nbonds,
3277 const t_iatom forceatoms[], const t_iparams forceparams[],
3278 const rvec x[], rvec f[], rvec fshift[],
3279 const t_pbc *pbc, const t_graph *g,
3280 real lambda, real *dvdlambda,
3281 const t_mdatoms *md, t_fcdata *fcd,
3282 int *global_atom_index)
3284 int i, m, ki, ai, aj, type;
3285 real dr2, fbond, vbond, fij, vtot;
3290 for (i = 0; (i < nbonds); )
3292 type = forceatoms[i++];
3293 ai = forceatoms[i++];
3294 aj = forceatoms[i++];
3296 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
3297 dr2 = iprod(dx, dx); /* 5 */
3299 *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3300 forceparams[type].harmonic.krB,
3301 forceparams[type].harmonic.rA,
3302 forceparams[type].harmonic.rB,
3303 dr2, lambda, &vbond, &fbond);
3305 vtot += 0.5*vbond; /* 1*/
3309 fprintf(debug, "G96-BONDS: dr = %10g vbond = %10g fbond = %10g\n",
3310 sqrt(dr2), vbond, fbond);
3316 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3319 for (m = 0; (m < DIM); m++) /* 15 */
3324 fshift[ki][m] += fij;
3325 fshift[CENTRAL][m] -= fij;
3331 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
3332 rvec r_ij, rvec r_kj,
3334 /* Return value is the angle between the bonds i-j and j-k */
3338 *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /* 3 */
3339 *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /* 3 */
3341 costh = cos_angle(r_ij, r_kj); /* 25 */
3346 real g96angles(int nbonds,
3347 const t_iatom forceatoms[], const t_iparams forceparams[],
3348 const rvec x[], rvec f[], rvec fshift[],
3349 const t_pbc *pbc, const t_graph *g,
3350 real lambda, real *dvdlambda,
3351 const t_mdatoms *md, t_fcdata *fcd,
3352 int *global_atom_index)
3354 int i, ai, aj, ak, type, m, t1, t2;
3356 real cos_theta, dVdt, va, vtot;
3357 real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3359 ivec jt, dt_ij, dt_kj;
3362 for (i = 0; (i < nbonds); )
3364 type = forceatoms[i++];
3365 ai = forceatoms[i++];
3366 aj = forceatoms[i++];
3367 ak = forceatoms[i++];
3369 cos_theta = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3371 *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3372 forceparams[type].harmonic.krB,
3373 forceparams[type].harmonic.rA,
3374 forceparams[type].harmonic.rB,
3375 cos_theta, lambda, &va, &dVdt);
3378 rij_1 = gmx_invsqrt(iprod(r_ij, r_ij));
3379 rkj_1 = gmx_invsqrt(iprod(r_kj, r_kj));
3380 rij_2 = rij_1*rij_1;
3381 rkj_2 = rkj_1*rkj_1;
3382 rijrkj_1 = rij_1*rkj_1; /* 23 */
3387 fprintf(debug, "G96ANGLES: costheta = %10g vth = %10g dV/dct = %10g\n",
3388 cos_theta, va, dVdt);
3391 for (m = 0; (m < DIM); m++) /* 42 */
3393 f_i[m] = dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
3394 f_k[m] = dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
3395 f_j[m] = -f_i[m]-f_k[m];
3403 copy_ivec(SHIFT_IVEC(g, aj), jt);
3405 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3406 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3407 t1 = IVEC2IS(dt_ij);
3408 t2 = IVEC2IS(dt_kj);
3410 rvec_inc(fshift[t1], f_i);
3411 rvec_inc(fshift[CENTRAL], f_j);
3412 rvec_inc(fshift[t2], f_k); /* 9 */
3418 real cross_bond_bond(int nbonds,
3419 const t_iatom forceatoms[], const t_iparams forceparams[],
3420 const rvec x[], rvec f[], rvec fshift[],
3421 const t_pbc *pbc, const t_graph *g,
3422 real lambda, real *dvdlambda,
3423 const t_mdatoms *md, t_fcdata *fcd,
3424 int *global_atom_index)
3426 /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3429 int i, ai, aj, ak, type, m, t1, t2;
3431 real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3433 ivec jt, dt_ij, dt_kj;
3436 for (i = 0; (i < nbonds); )
3438 type = forceatoms[i++];
3439 ai = forceatoms[i++];
3440 aj = forceatoms[i++];
3441 ak = forceatoms[i++];
3442 r1e = forceparams[type].cross_bb.r1e;
3443 r2e = forceparams[type].cross_bb.r2e;
3444 krr = forceparams[type].cross_bb.krr;
3446 /* Compute distance vectors ... */
3447 t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3448 t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3450 /* ... and their lengths */
3454 /* Deviations from ideality */
3458 /* Energy (can be negative!) */
3463 svmul(-krr*s2/r1, r_ij, f_i);
3464 svmul(-krr*s1/r2, r_kj, f_k);
3466 for (m = 0; (m < DIM); m++) /* 12 */
3468 f_j[m] = -f_i[m] - f_k[m];
3477 copy_ivec(SHIFT_IVEC(g, aj), jt);
3479 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3480 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3481 t1 = IVEC2IS(dt_ij);
3482 t2 = IVEC2IS(dt_kj);
3484 rvec_inc(fshift[t1], f_i);
3485 rvec_inc(fshift[CENTRAL], f_j);
3486 rvec_inc(fshift[t2], f_k); /* 9 */
3492 real cross_bond_angle(int nbonds,
3493 const t_iatom forceatoms[], const t_iparams forceparams[],
3494 const rvec x[], rvec f[], rvec fshift[],
3495 const t_pbc *pbc, const t_graph *g,
3496 real lambda, real *dvdlambda,
3497 const t_mdatoms *md, t_fcdata *fcd,
3498 int *global_atom_index)
3500 /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3503 int i, ai, aj, ak, type, m, t1, t2, t3;
3504 rvec r_ij, r_kj, r_ik;
3505 real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3507 ivec jt, dt_ij, dt_kj;
3510 for (i = 0; (i < nbonds); )
3512 type = forceatoms[i++];
3513 ai = forceatoms[i++];
3514 aj = forceatoms[i++];
3515 ak = forceatoms[i++];
3516 r1e = forceparams[type].cross_ba.r1e;
3517 r2e = forceparams[type].cross_ba.r2e;
3518 r3e = forceparams[type].cross_ba.r3e;
3519 krt = forceparams[type].cross_ba.krt;
3521 /* Compute distance vectors ... */
3522 t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3523 t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3524 t3 = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3526 /* ... and their lengths */
3531 /* Deviations from ideality */
3536 /* Energy (can be negative!) */
3537 vrt = krt*s3*(s1+s2);
3543 k3 = -krt*(s1+s2)/r3;
3544 for (m = 0; (m < DIM); m++)
3546 f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3547 f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3548 f_j[m] = -f_i[m] - f_k[m];
3551 for (m = 0; (m < DIM); m++) /* 12 */
3561 copy_ivec(SHIFT_IVEC(g, aj), jt);
3563 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3564 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3565 t1 = IVEC2IS(dt_ij);
3566 t2 = IVEC2IS(dt_kj);
3568 rvec_inc(fshift[t1], f_i);
3569 rvec_inc(fshift[CENTRAL], f_j);
3570 rvec_inc(fshift[t2], f_k); /* 9 */
3576 static real bonded_tab(const char *type, int table_nr,
3577 const bondedtable_t *table, real kA, real kB, real r,
3578 real lambda, real *V, real *F)
3580 real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3582 real v, f, dvdlambda;
3584 k = (1.0 - lambda)*kA + lambda*kB;
3586 tabscale = table->scale;
3587 VFtab = table->data;
3593 gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3594 type, table_nr, r, n0, n0+1, table->n);
3601 Geps = VFtab[nnn+2]*eps;
3602 Heps2 = VFtab[nnn+3]*eps2;
3603 Fp = Ft + Geps + Heps2;
3605 FF = Fp + Geps + 2.0*Heps2;
3607 *F = -k*FF*tabscale;
3609 dvdlambda = (kB - kA)*VV;
3613 /* That was 22 flops */
3616 real tab_bonds(int nbonds,
3617 const t_iatom forceatoms[], const t_iparams forceparams[],
3618 const rvec x[], rvec f[], rvec fshift[],
3619 const t_pbc *pbc, const t_graph *g,
3620 real lambda, real *dvdlambda,
3621 const t_mdatoms *md, t_fcdata *fcd,
3622 int *global_atom_index)
3624 int i, m, ki, ai, aj, type, table;
3625 real dr, dr2, fbond, vbond, fij, vtot;
3630 for (i = 0; (i < nbonds); )
3632 type = forceatoms[i++];
3633 ai = forceatoms[i++];
3634 aj = forceatoms[i++];
3636 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
3637 dr2 = iprod(dx, dx); /* 5 */
3638 dr = dr2*gmx_invsqrt(dr2); /* 10 */
3640 table = forceparams[type].tab.table;
3642 *dvdlambda += bonded_tab("bond", table,
3643 &fcd->bondtab[table],
3644 forceparams[type].tab.kA,
3645 forceparams[type].tab.kB,
3646 dr, lambda, &vbond, &fbond); /* 22 */
3654 vtot += vbond; /* 1*/
3655 fbond *= gmx_invsqrt(dr2); /* 6 */
3659 fprintf(debug, "TABBONDS: dr = %10g vbond = %10g fbond = %10g\n",
3665 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3668 for (m = 0; (m < DIM); m++) /* 15 */
3673 fshift[ki][m] += fij;
3674 fshift[CENTRAL][m] -= fij;
3680 real tab_angles(int nbonds,
3681 const t_iatom forceatoms[], const t_iparams forceparams[],
3682 const rvec x[], rvec f[], rvec fshift[],
3683 const t_pbc *pbc, const t_graph *g,
3684 real lambda, real *dvdlambda,
3685 const t_mdatoms *md, t_fcdata *fcd,
3686 int *global_atom_index)
3688 int i, ai, aj, ak, t1, t2, type, table;
3690 real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3691 ivec jt, dt_ij, dt_kj;
3694 for (i = 0; (i < nbonds); )
3696 type = forceatoms[i++];
3697 ai = forceatoms[i++];
3698 aj = forceatoms[i++];
3699 ak = forceatoms[i++];
3701 theta = bond_angle(x[ai], x[aj], x[ak], pbc,
3702 r_ij, r_kj, &cos_theta, &t1, &t2); /* 41 */
3704 table = forceparams[type].tab.table;
3706 *dvdlambda += bonded_tab("angle", table,
3707 &fcd->angletab[table],
3708 forceparams[type].tab.kA,
3709 forceparams[type].tab.kB,
3710 theta, lambda, &va, &dVdt); /* 22 */
3713 cos_theta2 = sqr(cos_theta); /* 1 */
3722 st = dVdt*gmx_invsqrt(1 - cos_theta2); /* 12 */
3723 sth = st*cos_theta; /* 1 */
3727 fprintf(debug, "ANGLES: theta = %10g vth = %10g dV/dtheta = %10g\n",
3728 theta*RAD2DEG, va, dVdt);
3731 nrkj2 = iprod(r_kj, r_kj); /* 5 */
3732 nrij2 = iprod(r_ij, r_ij);
3734 cik = st*gmx_invsqrt(nrkj2*nrij2); /* 12 */
3735 cii = sth/nrij2; /* 10 */
3736 ckk = sth/nrkj2; /* 10 */
3738 for (m = 0; (m < DIM); m++) /* 39 */
3740 f_i[m] = -(cik*r_kj[m]-cii*r_ij[m]);
3741 f_k[m] = -(cik*r_ij[m]-ckk*r_kj[m]);
3742 f_j[m] = -f_i[m]-f_k[m];
3749 copy_ivec(SHIFT_IVEC(g, aj), jt);
3751 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3752 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3753 t1 = IVEC2IS(dt_ij);
3754 t2 = IVEC2IS(dt_kj);
3756 rvec_inc(fshift[t1], f_i);
3757 rvec_inc(fshift[CENTRAL], f_j);
3758 rvec_inc(fshift[t2], f_k);
3764 real tab_dihs(int nbonds,
3765 const t_iatom forceatoms[], const t_iparams forceparams[],
3766 const rvec x[], rvec f[], rvec fshift[],
3767 const t_pbc *pbc, const t_graph *g,
3768 real lambda, real *dvdlambda,
3769 const t_mdatoms *md, t_fcdata *fcd,
3770 int *global_atom_index)
3772 int i, type, ai, aj, ak, al, table;
3774 rvec r_ij, r_kj, r_kl, m, n;
3775 real phi, sign, ddphi, vpd, vtot;
3778 for (i = 0; (i < nbonds); )
3780 type = forceatoms[i++];
3781 ai = forceatoms[i++];
3782 aj = forceatoms[i++];
3783 ak = forceatoms[i++];
3784 al = forceatoms[i++];
3786 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
3787 &sign, &t1, &t2, &t3); /* 84 */
3789 table = forceparams[type].tab.table;
3791 /* Hopefully phi+M_PI never results in values < 0 */
3792 *dvdlambda += bonded_tab("dihedral", table,
3793 &fcd->dihtab[table],
3794 forceparams[type].tab.kA,
3795 forceparams[type].tab.kB,
3796 phi+M_PI, lambda, &vpd, &ddphi);
3799 do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
3800 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
3803 fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
3804 ai, aj, ak, al, phi);
3812 calc_bonded_reduction_mask(const t_idef *idef,
3817 int ftype, nb, nat1, nb0, nb1, i, a;
3821 for (ftype = 0; ftype < F_NRE; ftype++)
3823 if (interaction_function[ftype].flags & IF_BOND &&
3824 !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
3825 (ftype<F_GB12 || ftype>F_GB14))
3827 nb = idef->il[ftype].nr;
3830 nat1 = interaction_function[ftype].nratoms + 1;
3832 /* Divide this interaction equally over the threads.
3833 * This is not stored: should match division in calc_bonds.
3835 nb0 = (((nb/nat1)* t )/nt)*nat1;
3836 nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
3838 for (i = nb0; i < nb1; i += nat1)
3840 for (a = 1; a < nat1; a++)
3842 mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
3852 void init_bonded_thread_force_reduction(t_forcerec *fr,
3855 #define MAX_BLOCK_BITS 32
3859 if (fr->nthreads <= 1)
3866 /* We divide the force array in a maximum of 32 blocks.
3867 * Minimum force block reduction size is 2^6=64.
3870 while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
3876 fprintf(debug, "bonded force buffer block atom shift %d bits\n",
3880 /* Determine to which blocks each thread's bonded force calculation
3881 * contributes. Store this is a mask for each thread.
3883 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3884 for (t = 1; t < fr->nthreads; t++)
3886 fr->f_t[t].red_mask =
3887 calc_bonded_reduction_mask(idef, fr->red_ashift, t, fr->nthreads);
3890 /* Determine the maximum number of blocks we need to reduce over */
3893 for (t = 0; t < fr->nthreads; t++)
3896 for (b = 0; b < MAX_BLOCK_BITS; b++)
3898 if (fr->f_t[t].red_mask & (1U<<b))
3900 fr->red_nblock = max(fr->red_nblock, b+1);
3906 fprintf(debug, "thread %d flags %x count %d\n",
3907 t, fr->f_t[t].red_mask, c);
3913 fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
3914 fr->red_nblock, 1<<fr->red_ashift);
3915 fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
3916 ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
3917 ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
3921 static void zero_thread_forces(f_thread_t *f_t, int n,
3922 int nblock, int blocksize)
3924 int b, a0, a1, a, i, j;
3926 if (n > f_t->f_nalloc)
3928 f_t->f_nalloc = over_alloc_large(n);
3929 srenew(f_t->f, f_t->f_nalloc);
3932 if (f_t->red_mask != 0)
3934 for (b = 0; b < nblock; b++)
3936 if (f_t->red_mask && (1U<<b))
3939 a1 = min((b+1)*blocksize, n);
3940 for (a = a0; a < a1; a++)
3942 clear_rvec(f_t->f[a]);
3947 for (i = 0; i < SHIFTS; i++)
3949 clear_rvec(f_t->fshift[i]);
3951 for (i = 0; i < F_NRE; i++)
3955 for (i = 0; i < egNR; i++)
3957 for (j = 0; j < f_t->grpp.nener; j++)
3959 f_t->grpp.ener[i][j] = 0;
3962 for (i = 0; i < efptNR; i++)
3968 static void reduce_thread_force_buffer(int n, rvec *f,
3969 int nthreads, f_thread_t *f_t,
3970 int nblock, int block_size)
3972 /* The max thread number is arbitrary,
3973 * we used a fixed number to avoid memory management.
3974 * Using more than 16 threads is probably never useful performance wise.
3976 #define MAX_BONDED_THREADS 256
3979 if (nthreads > MAX_BONDED_THREADS)
3981 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads",
3982 MAX_BONDED_THREADS);
3985 /* This reduction can run on any number of threads,
3986 * independently of nthreads.
3988 #pragma omp parallel for num_threads(nthreads) schedule(static)
3989 for (b = 0; b < nblock; b++)
3991 rvec *fp[MAX_BONDED_THREADS];
3995 /* Determine which threads contribute to this block */
3997 for (ft = 1; ft < nthreads; ft++)
3999 if (f_t[ft].red_mask & (1U<<b))
4001 fp[nfb++] = f_t[ft].f;
4006 /* Reduce force buffers for threads that contribute */
4008 a1 = (b+1)*block_size;
4010 for (a = a0; a < a1; a++)
4012 for (fb = 0; fb < nfb; fb++)
4014 rvec_inc(f[a], fp[fb][a]);
4021 static void reduce_thread_forces(int n, rvec *f, rvec *fshift,
4022 real *ener, gmx_grppairener_t *grpp, real *dvdl,
4023 int nthreads, f_thread_t *f_t,
4024 int nblock, int block_size,
4025 gmx_bool bCalcEnerVir,
4030 /* Reduce the bonded force buffer */
4031 reduce_thread_force_buffer(n, f, nthreads, f_t, nblock, block_size);
4034 /* When necessary, reduce energy and virial using one thread only */
4039 for (i = 0; i < SHIFTS; i++)
4041 for (t = 1; t < nthreads; t++)
4043 rvec_inc(fshift[i], f_t[t].fshift[i]);
4046 for (i = 0; i < F_NRE; i++)
4048 for (t = 1; t < nthreads; t++)
4050 ener[i] += f_t[t].ener[i];
4053 for (i = 0; i < egNR; i++)
4055 for (j = 0; j < f_t[1].grpp.nener; j++)
4057 for (t = 1; t < nthreads; t++)
4060 grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
4066 for (i = 0; i < efptNR; i++)
4069 for (t = 1; t < nthreads; t++)
4071 dvdl[i] += f_t[t].dvdl[i];
4078 static real calc_one_bond(FILE *fplog, int thread,
4079 int ftype, const t_idef *idef,
4080 rvec x[], rvec f[], rvec fshift[],
4082 const t_pbc *pbc, const t_graph *g,
4083 gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
4085 real *lambda, real *dvdl,
4086 const t_mdatoms *md, t_fcdata *fcd,
4087 gmx_bool bCalcEnerVir,
4088 int *global_atom_index, gmx_bool bPrintSepPot)
4090 int ind, nat1, nbonds, efptFTYPE;
4095 if (IS_RESTRAINT_TYPE(ftype))
4097 efptFTYPE = efptRESTRAINT;
4101 efptFTYPE = efptBONDED;
4104 if (interaction_function[ftype].flags & IF_BOND &&
4105 !(ftype == F_CONNBONDS || ftype == F_POSRES))
4107 ind = interaction_function[ftype].nrnb_ind;
4108 nat1 = interaction_function[ftype].nratoms + 1;
4109 nbonds = idef->il[ftype].nr/nat1;
4110 iatoms = idef->il[ftype].iatoms;
4112 nb0 = ((nbonds* thread )/(fr->nthreads))*nat1;
4113 nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
4115 if (!IS_LISTED_LJ_C(ftype))
4117 if (ftype == F_CMAP)
4119 v = cmap_dihs(nbn, iatoms+nb0,
4120 idef->iparams, &idef->cmap_grid,
4121 (const rvec*)x, f, fshift,
4122 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
4123 md, fcd, global_atom_index);
4126 else if (ftype == F_ANGLES &&
4127 !bCalcEnerVir && fr->efep == efepNO)
4129 /* No energies, shift forces, dvdl */
4130 angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
4133 pbc, g, lambda[efptFTYPE], md, fcd,
4138 else if (ftype == F_PDIHS &&
4139 !bCalcEnerVir && fr->efep == efepNO)
4141 /* No energies, shift forces, dvdl */
4142 #ifndef SIMD_BONDEDS
4147 (nbn, idef->il[ftype].iatoms+nb0,
4150 pbc, g, lambda[efptFTYPE], md, fcd,
4156 v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
4158 (const rvec*)x, f, fshift,
4159 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
4160 md, fcd, global_atom_index);
4164 fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n",
4165 interaction_function[ftype].longname,
4166 nbonds/nat1, v, lambda[efptFTYPE]);
4171 v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
4172 pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
4176 fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
4177 interaction_function[ftype].longname,
4178 interaction_function[F_LJ14].longname, nbonds/nat1, dvdl[efptVDW]);
4179 fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
4180 interaction_function[ftype].longname,
4181 interaction_function[F_COUL14].longname, nbonds/nat1, dvdl[efptCOUL]);
4184 if (ind != -1 && thread == 0)
4186 inc_nrnb(nrnb, ind, nbonds);
4193 /* WARNING! THIS FUNCTION MUST EXACTLY TRACK THE calc
4194 function, or horrible things will happen when doing free energy
4195 calculations! In a good coding world, this would not be a
4196 different function, but for speed reasons, it needs to be made a
4197 separate function. TODO for 5.0 - figure out a way to reorganize
4198 to reduce duplication.
4201 static real calc_one_bond_foreign(FILE *fplog, int ftype, const t_idef *idef,
4202 rvec x[], rvec f[], t_forcerec *fr,
4203 const t_pbc *pbc, const t_graph *g,
4204 gmx_grppairener_t *grpp, t_nrnb *nrnb,
4205 real *lambda, real *dvdl,
4206 const t_mdatoms *md, t_fcdata *fcd,
4207 int *global_atom_index, gmx_bool bPrintSepPot)
4209 int ind, nat1, nbonds, efptFTYPE, nbonds_np;
4213 if (IS_RESTRAINT_TYPE(ftype))
4215 efptFTYPE = efptRESTRAINT;
4219 efptFTYPE = efptBONDED;
4222 if (ftype < F_GB12 || ftype > F_GB14)
4224 if (interaction_function[ftype].flags & IF_BOND &&
4225 !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
4227 ind = interaction_function[ftype].nrnb_ind;
4228 nat1 = interaction_function[ftype].nratoms+1;
4229 nbonds_np = idef->il[ftype].nr_nonperturbed;
4230 nbonds = idef->il[ftype].nr - nbonds_np;
4231 iatoms = idef->il[ftype].iatoms + nbonds_np;
4234 if (!IS_LISTED_LJ_C(ftype))
4236 if (ftype == F_CMAP)
4238 v = cmap_dihs(nbonds, iatoms,
4239 idef->iparams, &idef->cmap_grid,
4240 (const rvec*)x, f, fr->fshift,
4241 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
4246 v = interaction_function[ftype].ifunc(nbonds, iatoms,
4248 (const rvec*)x, f, fr->fshift,
4249 pbc, g, lambda[efptFTYPE], &dvdl[efptFTYPE],
4250 md, fcd, global_atom_index);
4255 v = do_nonbonded_listed(ftype, nbonds, iatoms,
4257 (const rvec*)x, f, fr->fshift,
4258 pbc, g, lambda, dvdl,
4259 md, fr, grpp, global_atom_index);
4263 inc_nrnb(nrnb, ind, nbonds/nat1);
4271 void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
4273 rvec x[], history_t *hist,
4274 rvec f[], t_forcerec *fr,
4275 const t_pbc *pbc, const t_graph *g,
4276 gmx_enerdata_t *enerd, t_nrnb *nrnb,
4278 const t_mdatoms *md,
4279 t_fcdata *fcd, int *global_atom_index,
4280 t_atomtypes *atype, gmx_genborn_t *born,
4282 gmx_bool bPrintSepPot, gmx_large_int_t step)
4284 gmx_bool bCalcEnerVir;
4286 real v, dvdl[efptNR], dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
4287 of lambda, which will be thrown away in the end*/
4288 const t_pbc *pbc_null;
4292 bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
4294 for (i = 0; i < efptNR; i++)
4308 fprintf(fplog, "Step %s: bonded V and dVdl for this node\n",
4309 gmx_step_str(step, buf));
4315 p_graph(debug, "Bondage is fun", g);
4319 /* Do pre force calculation stuff which might require communication */
4320 if (idef->il[F_ORIRES].nr)
4322 enerd->term[F_ORIRESDEV] =
4323 calc_orires_dev(ms, idef->il[F_ORIRES].nr,
4324 idef->il[F_ORIRES].iatoms,
4325 idef->iparams, md, (const rvec*)x,
4326 pbc_null, fcd, hist);
4328 if (idef->il[F_DISRES].nr)
4330 calc_disres_R_6(ms, idef->il[F_DISRES].nr,
4331 idef->il[F_DISRES].iatoms,
4332 idef->iparams, (const rvec*)x, pbc_null,
4336 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
4337 for (thread = 0; thread < fr->nthreads; thread++)
4339 int ftype, nbonds, ind, nat1;
4344 gmx_grppairener_t *grpp;
4350 fshift = fr->fshift;
4352 grpp = &enerd->grpp;
4357 zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
4358 fr->red_nblock, 1<<fr->red_ashift);
4360 ft = fr->f_t[thread].f;
4361 fshift = fr->f_t[thread].fshift;
4362 epot = fr->f_t[thread].ener;
4363 grpp = &fr->f_t[thread].grpp;
4364 dvdlt = fr->f_t[thread].dvdl;
4366 /* Loop over all bonded force types to calculate the bonded forces */
4367 for (ftype = 0; (ftype < F_NRE); ftype++)
4369 if (idef->il[ftype].nr > 0 &&
4370 (interaction_function[ftype].flags & IF_BOND) &&
4371 (ftype < F_GB12 || ftype > F_GB14) &&
4372 !(ftype == F_CONNBONDS || ftype == F_POSRES))
4374 v = calc_one_bond(fplog, thread, ftype, idef, x,
4375 ft, fshift, fr, pbc_null, g, enerd, grpp,
4376 nrnb, lambda, dvdlt,
4377 md, fcd, bCalcEnerVir,
4378 global_atom_index, bPrintSepPot);
4383 if (fr->nthreads > 1)
4385 reduce_thread_forces(fr->natoms_force, f, fr->fshift,
4386 enerd->term, &enerd->grpp, dvdl,
4387 fr->nthreads, fr->f_t,
4388 fr->red_nblock, 1<<fr->red_ashift,
4390 force_flags & GMX_FORCE_DHDL);
4392 if (force_flags & GMX_FORCE_DHDL)
4394 for (i = 0; i < efptNR; i++)
4396 enerd->dvdl_nonlin[i] += dvdl[i];
4400 /* Copy the sum of violations for the distance restraints from fcd */
4403 enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
4408 void calc_bonds_lambda(FILE *fplog,
4412 const t_pbc *pbc, const t_graph *g,
4413 gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
4415 const t_mdatoms *md,
4417 int *global_atom_index)
4419 int i, ftype, nbonds_np, nbonds, ind, nat;
4421 real dvdl_dum[efptNR];
4422 rvec *f, *fshift_orig;
4423 const t_pbc *pbc_null;
4435 snew(f, fr->natoms_force);
4436 /* We want to preserve the fshift array in forcerec */
4437 fshift_orig = fr->fshift;
4438 snew(fr->fshift, SHIFTS);
4440 /* Loop over all bonded force types to calculate the bonded forces */
4441 for (ftype = 0; (ftype < F_NRE); ftype++)
4443 v = calc_one_bond_foreign(fplog, ftype, idef, x,
4444 f, fr, pbc_null, g, grpp, nrnb, lambda, dvdl_dum,
4445 md, fcd, global_atom_index, FALSE);
4450 fr->fshift = fshift_orig;