2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2009, The GROMACS Development Team.
6 * Copyright (c) 2010, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "types/simple.h"
51 #include "genborn_allvsall.h"
57 int ** exclusion_mask_gb;
59 gmx_allvsallgb2_data_t;
62 calc_maxoffset(int i, int natoms)
66 if ((natoms % 2) == 1)
68 /* Odd number of atoms, easy */
71 else if ((natoms % 4) == 0)
73 /* Multiple of four is hard */
82 maxoffset = natoms/2-1;
93 maxoffset = natoms/2-1;
102 maxoffset = natoms/2;
106 maxoffset = natoms/2-1;
114 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
128 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
129 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
130 * whether they should interact or not.
132 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
133 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
134 * we create a jindex array with three elements per i atom: the starting point, the point to
135 * which we need to check exclusions, and the end point.
136 * This way we only have to allocate a short exclusion mask per i atom.
139 /* Allocate memory for jindex arrays */
140 snew(aadata->jindex_gb, 3*natoms);
142 /* Pointer to lists with exclusion masks */
143 snew(aadata->exclusion_mask_gb, natoms);
145 for (i = 0; i < natoms; i++)
148 aadata->jindex_gb[3*i] = i+1;
149 max_offset = calc_maxoffset(i, natoms);
151 /* first check the max range of atoms to EXCLUDE */
155 for (j = 0; j < ilist[F_GB12].nr; j += 3)
157 a1 = ilist[F_GB12].iatoms[j+1];
158 a2 = ilist[F_GB12].iatoms[j+2];
172 if (k > 0 && k <= max_offset)
174 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
180 for (j = 0; j < ilist[F_GB13].nr; j += 3)
182 a1 = ilist[F_GB13].iatoms[j+1];
183 a2 = ilist[F_GB13].iatoms[j+2];
198 if (k > 0 && k <= max_offset)
200 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
206 for (j = 0; j < ilist[F_GB14].nr; j += 3)
208 a1 = ilist[F_GB14].iatoms[j+1];
209 a2 = ilist[F_GB14].iatoms[j+2];
224 if (k > 0 && k <= max_offset)
226 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
230 max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
232 aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
234 snew(aadata->exclusion_mask_gb[i], max_excl_offset);
236 /* Include everything by default */
237 for (j = 0; j < max_excl_offset; j++)
239 /* Use all-ones to mark interactions that should be present, compatible with SSE */
240 aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
242 /* Go through exclusions again */
245 for (j = 0; j < ilist[F_GB12].nr; j += 3)
247 a1 = ilist[F_GB12].iatoms[j+1];
248 a2 = ilist[F_GB12].iatoms[j+2];
262 if (k > 0 && k <= max_offset)
264 aadata->exclusion_mask_gb[i][k-1] = 0;
270 for (j = 0; j < ilist[F_GB13].nr; j += 3)
272 a1 = ilist[F_GB13].iatoms[j+1];
273 a2 = ilist[F_GB13].iatoms[j+2];
287 if (k > 0 && k <= max_offset)
289 aadata->exclusion_mask_gb[i][k-1] = 0;
295 for (j = 0; j < ilist[F_GB14].nr; j += 3)
297 a1 = ilist[F_GB14].iatoms[j+1];
298 a2 = ilist[F_GB14].iatoms[j+2];
312 if (k > 0 && k <= max_offset)
314 aadata->exclusion_mask_gb[i][k-1] = 0;
322 aadata->jindex_gb[3*i+2] = i+1+max_offset;
328 genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
336 gmx_allvsallgb2_data_t *aadata;
342 setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
348 genborn_allvsall_calc_still_radii(t_forcerec * fr,
350 gmx_genborn_t * born,
351 gmx_localtop_t * top,
356 gmx_allvsallgb2_data_t *aadata;
369 real irsq, idr4, idr6;
370 real raj, rvdw, ratio;
371 real vaj, ccf, dccf, theta, cosq;
372 real term, prod, icf4, icf6, gpi2, factor, sinq;
374 natoms = mdatoms->nr;
375 ni0 = mdatoms->start;
376 ni1 = mdatoms->start+mdatoms->homenr;
377 factor = 0.5*ONE_4PI_EPS0;
380 aadata = *((gmx_allvsallgb2_data_t **)work);
384 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
386 *((gmx_allvsallgb2_data_t **)work) = aadata;
390 for (i = 0; i < born->nr; i++)
392 born->gpol_still_work[i] = 0;
396 for (i = ni0; i < ni1; i++)
398 /* We assume shifts are NOT used for all-vs-all interactions */
400 /* Load i atom data */
407 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
408 vai = born->vsolv[i];
409 prod_ai = STILL_P4*vai;
411 /* Load limits for loop over neighbors */
412 nj0 = aadata->jindex_gb[3*i];
413 nj1 = aadata->jindex_gb[3*i+1];
414 nj2 = aadata->jindex_gb[3*i+2];
416 mask = aadata->exclusion_mask_gb[i];
418 /* Prologue part, including exclusion mask */
419 for (j = nj0; j < nj1; j++, mask++)
425 /* load j atom coordinates */
430 /* Calculate distance */
434 rsq = dx*dx+dy*dy+dz*dz;
436 /* Calculate 1/r and 1/r2 */
437 rinv = gmx_invsqrt(rsq);
442 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
446 ratio = rsq / (rvdw * rvdw);
447 vaj = born->vsolv[k];
450 if (ratio > STILL_P5INV)
457 theta = ratio*STILL_PIP5;
459 term = 0.5*(1.0-cosq);
461 sinq = 1.0 - cosq*cosq;
462 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
467 icf6 = (4*ccf-dccf)*idr6;
469 born->gpol_still_work[k] += prod_ai*icf4;
472 /* Save ai->aj and aj->ai chain rule terms */
473 fr->dadx[n++] = prod*icf6;
474 fr->dadx[n++] = prod_ai*icf6;
476 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
481 /* Main part, no exclusions */
482 for (j = nj1; j < nj2; j++)
486 /* load j atom coordinates */
491 /* Calculate distance */
495 rsq = dx*dx+dy*dy+dz*dz;
497 /* Calculate 1/r and 1/r2 */
498 rinv = gmx_invsqrt(rsq);
503 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
507 ratio = rsq / (rvdw * rvdw);
508 vaj = born->vsolv[k];
510 if (ratio > STILL_P5INV)
517 theta = ratio*STILL_PIP5;
519 term = 0.5*(1.0-cosq);
521 sinq = 1.0 - cosq*cosq;
522 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
527 icf6 = (4*ccf-dccf)*idr6;
529 born->gpol_still_work[k] += prod_ai*icf4;
532 /* Save ai->aj and aj->ai chain rule terms */
533 fr->dadx[n++] = prod*icf6;
534 fr->dadx[n++] = prod_ai*icf6;
536 born->gpol_still_work[i] += gpi;
539 /* Parallel summations */
542 gmx_sum(natoms, born->gpol_still_work, cr);
545 /* Calculate the radii */
546 for (i = 0; i < natoms; i++)
548 if (born->use[i] != 0)
550 gpi = born->gpol[i]+born->gpol_still_work[i];
552 born->bRad[i] = factor*gmx_invsqrt(gpi2);
553 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
563 genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr,
565 gmx_genborn_t * born,
567 gmx_localtop_t * top,
572 gmx_allvsallgb2_data_t *aadata;
584 real rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
585 real dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
586 real lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
591 natoms = mdatoms->nr;
592 ni0 = mdatoms->start;
593 ni1 = mdatoms->start+mdatoms->homenr;
598 doffset = born->gb_doffset;
600 aadata = *((gmx_allvsallgb2_data_t **)work);
604 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
606 *((gmx_allvsallgb2_data_t **)work) = aadata;
609 for (i = 0; i < born->nr; i++)
611 born->gpol_hct_work[i] = 0;
614 for (i = ni0; i < ni1; i++)
616 /* We assume shifts are NOT used for all-vs-all interactions */
618 /* Load i atom data */
623 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
626 sk_ai = born->param[i];
627 sk2_ai = sk_ai*sk_ai;
631 /* Load limits for loop over neighbors */
632 nj0 = aadata->jindex_gb[3*i];
633 nj1 = aadata->jindex_gb[3*i+1];
634 nj2 = aadata->jindex_gb[3*i+2];
636 mask = aadata->exclusion_mask_gb[i];
638 /* Prologue part, including exclusion mask */
639 for (j = nj0; j < nj1; j++, mask++)
645 /* load j atom coordinates */
650 /* Calculate distance */
654 rsq = dx*dx+dy*dy+dz*dz;
656 /* Calculate 1/r and 1/r2 */
657 rinv = gmx_invsqrt(rsq);
660 /* sk is precalculated in init_gb() */
662 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
664 /* aj -> ai interaction */
686 lij_inv = gmx_invsqrt(lij2);
689 prod = 0.25*sk2_rinv;
691 log_term = log(uij*lij_inv);
692 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
693 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
697 tmp = tmp + 2.0 * (rai_inv-lij);
700 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
701 t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
703 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
705 dadxi = (dlij*t1+t2+t3)*rinv;
714 /* ai -> aj interaction */
715 if (raj < dr + sk_ai)
717 lij = 1.0/(dr-sk_ai);
730 uij = 1.0/(dr+sk_ai);
736 lij_inv = gmx_invsqrt(lij2);
737 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
739 prod = 0.25 * sk2_rinv;
741 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
742 log_term = log(uij*lij_inv);
744 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
748 tmp = tmp + 2.0 * (raj_inv-lij);
751 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
752 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
753 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
755 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
757 born->gpol_hct_work[k] += 0.5*tmp;
763 fr->dadx[n++] = dadxi;
764 fr->dadx[n++] = dadxj;
769 /* Main part, no exclusions */
770 for (j = nj1; j < nj2; j++)
774 /* load j atom coordinates */
779 /* Calculate distance */
783 rsq = dx*dx+dy*dy+dz*dz;
785 /* Calculate 1/r and 1/r2 */
786 rinv = gmx_invsqrt(rsq);
789 /* sk is precalculated in init_gb() */
791 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
793 /* aj -> ai interaction */
813 lij_inv = gmx_invsqrt(lij2);
816 prod = 0.25*sk2_rinv;
818 log_term = log(uij*lij_inv);
819 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
820 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
824 tmp = tmp + 2.0 * (rai_inv-lij);
828 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
829 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
830 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
832 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
841 /* ai -> aj interaction */
842 if (raj < dr + sk_ai)
844 lij = 1.0/(dr-sk_ai);
857 uij = 1.0/(dr+sk_ai);
863 lij_inv = gmx_invsqrt(lij2);
864 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
866 prod = 0.25 * sk2_rinv;
868 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
869 log_term = log(uij*lij_inv);
871 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
875 tmp = tmp + 2.0 * (raj_inv-lij);
878 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
879 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
880 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
882 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
884 born->gpol_hct_work[k] += 0.5*tmp;
890 fr->dadx[n++] = dadxi;
891 fr->dadx[n++] = dadxj;
893 born->gpol_hct_work[i] += sum_ai;
896 /* Parallel summations */
899 gmx_sum(natoms, born->gpol_hct_work, cr);
902 if (gb_algorithm == egbHCT)
905 for (i = 0; i < natoms; i++)
907 if (born->use[i] != 0)
909 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
910 sum_ai = 1.0/rai - born->gpol_hct_work[i];
911 min_rad = rai + born->gb_doffset;
914 born->bRad[i] = rad > min_rad ? rad : min_rad;
915 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
923 /* Calculate the radii */
924 for (i = 0; i < natoms; i++)
926 if (born->use[i] != 0)
928 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
932 sum_ai = rai * born->gpol_hct_work[i];
933 sum_ai2 = sum_ai * sum_ai;
934 sum_ai3 = sum_ai2 * sum_ai;
936 tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
937 born->bRad[i] = rai_inv - tsum*rai_inv2;
938 born->bRad[i] = 1.0 / born->bRad[i];
940 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
942 tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
943 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
955 genborn_allvsall_calc_chainrule(t_forcerec * fr,
957 gmx_genborn_t * born,
963 gmx_allvsallgb2_data_t *aadata;
976 real rbai, rbaj, fgb, fgb_ai, rbi;
980 natoms = mdatoms->nr;
981 ni0 = mdatoms->start;
982 ni1 = mdatoms->start+mdatoms->homenr;
985 aadata = (gmx_allvsallgb2_data_t *)work;
990 /* Loop to get the proper form for the Born radius term */
991 if (gb_algorithm == egbSTILL)
993 for (i = 0; i < natoms; i++)
996 rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
999 else if (gb_algorithm == egbHCT)
1001 for (i = 0; i < natoms; i++)
1003 rbi = born->bRad[i];
1004 rb[i] = rbi * rbi * fr->dvda[i];
1007 else if (gb_algorithm == egbOBC)
1009 for (idx = 0; idx < natoms; idx++)
1011 rbi = born->bRad[idx];
1012 rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1016 for (i = ni0; i < ni1; i++)
1018 /* We assume shifts are NOT used for all-vs-all interactions */
1020 /* Load i atom data */
1031 /* Load limits for loop over neighbors */
1032 nj0 = aadata->jindex_gb[3*i];
1033 nj1 = aadata->jindex_gb[3*i+1];
1034 nj2 = aadata->jindex_gb[3*i+2];
1036 mask = aadata->exclusion_mask_gb[i];
1038 /* Prologue part, including exclusion mask */
1039 for (j = nj0; j < nj1; j++, mask++)
1045 /* load j atom coordinates */
1050 /* Calculate distance */
1057 fgb = rbai*dadx[n++];
1058 fgb_ai = rbaj*dadx[n++];
1060 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1071 /* Update force on atom aj */
1072 f[3*k] = f[3*k] - tx;
1073 f[3*k+1] = f[3*k+1] - ty;
1074 f[3*k+2] = f[3*k+2] - tz;
1078 /* Main part, no exclusions */
1079 for (j = nj1; j < nj2; j++)
1083 /* load j atom coordinates */
1088 /* Calculate distance */
1095 fgb = rbai*dadx[n++];
1096 fgb_ai = rbaj*dadx[n++];
1098 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1109 /* Update force on atom aj */
1110 f[3*k] = f[3*k] - tx;
1111 f[3*k+1] = f[3*k+1] - ty;
1112 f[3*k+2] = f[3*k+2] - tz;
1114 /* Update force and shift forces on atom ai */
1115 f[3*i] = f[3*i] + fix;
1116 f[3*i+1] = f[3*i+1] + fiy;
1117 f[3*i+2] = f[3*i+2] + fiz;