2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2009, The GROMACS Development Team.
6 * Copyright (c) 2010,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/utility/smalloc.h"
48 #include "gromacs/math/units.h"
50 #include "genborn_allvsall.h"
56 int ** exclusion_mask_gb;
58 gmx_allvsallgb2_data_t;
61 calc_maxoffset(int i, int natoms)
65 if ((natoms % 2) == 1)
67 /* Odd number of atoms, easy */
70 else if ((natoms % 4) == 0)
72 /* Multiple of four is hard */
81 maxoffset = natoms/2-1;
92 maxoffset = natoms/2-1;
101 maxoffset = natoms/2;
105 maxoffset = natoms/2-1;
113 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
127 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
128 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
129 * whether they should interact or not.
131 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
132 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
133 * we create a jindex array with three elements per i atom: the starting point, the point to
134 * which we need to check exclusions, and the end point.
135 * This way we only have to allocate a short exclusion mask per i atom.
138 /* Allocate memory for jindex arrays */
139 snew(aadata->jindex_gb, 3*natoms);
141 /* Pointer to lists with exclusion masks */
142 snew(aadata->exclusion_mask_gb, natoms);
144 for (i = 0; i < natoms; i++)
147 aadata->jindex_gb[3*i] = i+1;
148 max_offset = calc_maxoffset(i, natoms);
150 /* first check the max range of atoms to EXCLUDE */
154 for (j = 0; j < ilist[F_GB12].nr; j += 3)
156 a1 = ilist[F_GB12].iatoms[j+1];
157 a2 = ilist[F_GB12].iatoms[j+2];
171 if (k > 0 && k <= max_offset)
173 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
179 for (j = 0; j < ilist[F_GB13].nr; j += 3)
181 a1 = ilist[F_GB13].iatoms[j+1];
182 a2 = ilist[F_GB13].iatoms[j+2];
197 if (k > 0 && k <= max_offset)
199 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
205 for (j = 0; j < ilist[F_GB14].nr; j += 3)
207 a1 = ilist[F_GB14].iatoms[j+1];
208 a2 = ilist[F_GB14].iatoms[j+2];
223 if (k > 0 && k <= max_offset)
225 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
229 max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
231 aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
233 snew(aadata->exclusion_mask_gb[i], max_excl_offset);
235 /* Include everything by default */
236 for (j = 0; j < max_excl_offset; j++)
238 /* Use all-ones to mark interactions that should be present, compatible with SSE */
239 aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
241 /* Go through exclusions again */
244 for (j = 0; j < ilist[F_GB12].nr; j += 3)
246 a1 = ilist[F_GB12].iatoms[j+1];
247 a2 = ilist[F_GB12].iatoms[j+2];
261 if (k > 0 && k <= max_offset)
263 aadata->exclusion_mask_gb[i][k-1] = 0;
269 for (j = 0; j < ilist[F_GB13].nr; j += 3)
271 a1 = ilist[F_GB13].iatoms[j+1];
272 a2 = ilist[F_GB13].iatoms[j+2];
286 if (k > 0 && k <= max_offset)
288 aadata->exclusion_mask_gb[i][k-1] = 0;
294 for (j = 0; j < ilist[F_GB14].nr; j += 3)
296 a1 = ilist[F_GB14].iatoms[j+1];
297 a2 = ilist[F_GB14].iatoms[j+2];
311 if (k > 0 && k <= max_offset)
313 aadata->exclusion_mask_gb[i][k-1] = 0;
321 aadata->jindex_gb[3*i+2] = i+1+max_offset;
327 genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
335 gmx_allvsallgb2_data_t *aadata;
341 setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
347 genborn_allvsall_calc_still_radii(t_forcerec * fr,
349 gmx_genborn_t * born,
350 gmx_localtop_t * top,
354 gmx_allvsallgb2_data_t *aadata;
367 real irsq, idr4, idr6;
368 real raj, rvdw, ratio;
369 real vaj, ccf, dccf, theta, cosq;
370 real term, prod, icf4, icf6, gpi2, factor, sinq;
372 natoms = mdatoms->nr;
374 ni1 = mdatoms->homenr;
375 factor = 0.5*ONE_4PI_EPS0;
378 aadata = *((gmx_allvsallgb2_data_t **)work);
382 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
384 *((gmx_allvsallgb2_data_t **)work) = aadata;
388 for (i = 0; i < born->nr; i++)
390 born->gpol_still_work[i] = 0;
394 for (i = ni0; i < ni1; i++)
396 /* We assume shifts are NOT used for all-vs-all interactions */
398 /* Load i atom data */
405 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
406 vai = born->vsolv[i];
407 prod_ai = STILL_P4*vai;
409 /* Load limits for loop over neighbors */
410 nj0 = aadata->jindex_gb[3*i];
411 nj1 = aadata->jindex_gb[3*i+1];
412 nj2 = aadata->jindex_gb[3*i+2];
414 mask = aadata->exclusion_mask_gb[i];
416 /* Prologue part, including exclusion mask */
417 for (j = nj0; j < nj1; j++, mask++)
423 /* load j atom coordinates */
428 /* Calculate distance */
432 rsq = dx*dx+dy*dy+dz*dz;
434 /* Calculate 1/r and 1/r2 */
435 rinv = gmx_invsqrt(rsq);
440 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
444 ratio = rsq / (rvdw * rvdw);
445 vaj = born->vsolv[k];
448 if (ratio > STILL_P5INV)
455 theta = ratio*STILL_PIP5;
457 term = 0.5*(1.0-cosq);
459 sinq = 1.0 - cosq*cosq;
460 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
465 icf6 = (4*ccf-dccf)*idr6;
467 born->gpol_still_work[k] += prod_ai*icf4;
470 /* Save ai->aj and aj->ai chain rule terms */
471 fr->dadx[n++] = prod*icf6;
472 fr->dadx[n++] = prod_ai*icf6;
474 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
479 /* Main part, no exclusions */
480 for (j = nj1; j < nj2; j++)
484 /* load j atom coordinates */
489 /* Calculate distance */
493 rsq = dx*dx+dy*dy+dz*dz;
495 /* Calculate 1/r and 1/r2 */
496 rinv = gmx_invsqrt(rsq);
501 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
505 ratio = rsq / (rvdw * rvdw);
506 vaj = born->vsolv[k];
508 if (ratio > STILL_P5INV)
515 theta = ratio*STILL_PIP5;
517 term = 0.5*(1.0-cosq);
519 sinq = 1.0 - cosq*cosq;
520 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
525 icf6 = (4*ccf-dccf)*idr6;
527 born->gpol_still_work[k] += prod_ai*icf4;
530 /* Save ai->aj and aj->ai chain rule terms */
531 fr->dadx[n++] = prod*icf6;
532 fr->dadx[n++] = prod_ai*icf6;
534 born->gpol_still_work[i] += gpi;
537 /* Parallel summations would go here if ever implemented with DD */
539 /* Calculate the radii */
540 for (i = 0; i < natoms; i++)
542 if (born->use[i] != 0)
544 gpi = born->gpol[i]+born->gpol_still_work[i];
546 born->bRad[i] = factor*gmx_invsqrt(gpi2);
547 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
557 genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr,
559 gmx_genborn_t * born,
561 gmx_localtop_t * top,
565 gmx_allvsallgb2_data_t *aadata;
577 real rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
578 real dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
579 real lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
584 natoms = mdatoms->nr;
586 ni1 = mdatoms->homenr;
591 doffset = born->gb_doffset;
593 aadata = *((gmx_allvsallgb2_data_t **)work);
597 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
599 *((gmx_allvsallgb2_data_t **)work) = aadata;
602 for (i = 0; i < born->nr; i++)
604 born->gpol_hct_work[i] = 0;
607 for (i = ni0; i < ni1; i++)
609 /* We assume shifts are NOT used for all-vs-all interactions */
611 /* Load i atom data */
616 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
619 sk_ai = born->param[i];
620 sk2_ai = sk_ai*sk_ai;
624 /* Load limits for loop over neighbors */
625 nj0 = aadata->jindex_gb[3*i];
626 nj1 = aadata->jindex_gb[3*i+1];
627 nj2 = aadata->jindex_gb[3*i+2];
629 mask = aadata->exclusion_mask_gb[i];
631 /* Prologue part, including exclusion mask */
632 for (j = nj0; j < nj1; j++, mask++)
638 /* load j atom coordinates */
643 /* Calculate distance */
647 rsq = dx*dx+dy*dy+dz*dz;
649 /* Calculate 1/r and 1/r2 */
650 rinv = gmx_invsqrt(rsq);
653 /* sk is precalculated in init_gb() */
655 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
657 /* aj -> ai interaction */
679 lij_inv = gmx_invsqrt(lij2);
682 prod = 0.25*sk2_rinv;
684 log_term = log(uij*lij_inv);
685 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
686 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
690 tmp = tmp + 2.0 * (rai_inv-lij);
693 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
694 t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
696 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
698 dadxi = (dlij*t1+t2+t3)*rinv;
707 /* ai -> aj interaction */
708 if (raj < dr + sk_ai)
710 lij = 1.0/(dr-sk_ai);
723 uij = 1.0/(dr+sk_ai);
729 lij_inv = gmx_invsqrt(lij2);
730 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
732 prod = 0.25 * sk2_rinv;
734 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
735 log_term = log(uij*lij_inv);
737 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
741 tmp = tmp + 2.0 * (raj_inv-lij);
744 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
745 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
746 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
748 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
750 born->gpol_hct_work[k] += 0.5*tmp;
756 fr->dadx[n++] = dadxi;
757 fr->dadx[n++] = dadxj;
762 /* Main part, no exclusions */
763 for (j = nj1; j < nj2; j++)
767 /* load j atom coordinates */
772 /* Calculate distance */
776 rsq = dx*dx+dy*dy+dz*dz;
778 /* Calculate 1/r and 1/r2 */
779 rinv = gmx_invsqrt(rsq);
782 /* sk is precalculated in init_gb() */
784 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
786 /* aj -> ai interaction */
806 lij_inv = gmx_invsqrt(lij2);
809 prod = 0.25*sk2_rinv;
811 log_term = log(uij*lij_inv);
812 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
813 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
817 tmp = tmp + 2.0 * (rai_inv-lij);
821 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
822 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
823 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
825 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
834 /* ai -> aj interaction */
835 if (raj < dr + sk_ai)
837 lij = 1.0/(dr-sk_ai);
850 uij = 1.0/(dr+sk_ai);
856 lij_inv = gmx_invsqrt(lij2);
857 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
859 prod = 0.25 * sk2_rinv;
861 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
862 log_term = log(uij*lij_inv);
864 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
868 tmp = tmp + 2.0 * (raj_inv-lij);
871 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
872 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
873 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
875 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
877 born->gpol_hct_work[k] += 0.5*tmp;
883 fr->dadx[n++] = dadxi;
884 fr->dadx[n++] = dadxj;
886 born->gpol_hct_work[i] += sum_ai;
889 /* Parallel summations would go here if ever implemented with DD */
891 if (gb_algorithm == egbHCT)
894 for (i = 0; i < natoms; i++)
896 if (born->use[i] != 0)
898 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
899 sum_ai = 1.0/rai - born->gpol_hct_work[i];
900 min_rad = rai + born->gb_doffset;
903 born->bRad[i] = rad > min_rad ? rad : min_rad;
904 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
912 /* Calculate the radii */
913 for (i = 0; i < natoms; i++)
915 if (born->use[i] != 0)
917 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
921 sum_ai = rai * born->gpol_hct_work[i];
922 sum_ai2 = sum_ai * sum_ai;
923 sum_ai3 = sum_ai2 * sum_ai;
925 tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
926 born->bRad[i] = rai_inv - tsum*rai_inv2;
927 born->bRad[i] = 1.0 / born->bRad[i];
929 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
931 tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
932 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
944 genborn_allvsall_calc_chainrule(t_forcerec * fr,
946 gmx_genborn_t * born,
952 gmx_allvsallgb2_data_t *aadata;
965 real rbai, rbaj, fgb, fgb_ai, rbi;
969 natoms = mdatoms->nr;
971 ni1 = mdatoms->homenr;
974 aadata = (gmx_allvsallgb2_data_t *)work;
979 /* Loop to get the proper form for the Born radius term */
980 if (gb_algorithm == egbSTILL)
982 for (i = 0; i < natoms; i++)
985 rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
988 else if (gb_algorithm == egbHCT)
990 for (i = 0; i < natoms; i++)
993 rb[i] = rbi * rbi * fr->dvda[i];
996 else if (gb_algorithm == egbOBC)
998 for (idx = 0; idx < natoms; idx++)
1000 rbi = born->bRad[idx];
1001 rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1005 for (i = ni0; i < ni1; i++)
1007 /* We assume shifts are NOT used for all-vs-all interactions */
1009 /* Load i atom data */
1020 /* Load limits for loop over neighbors */
1021 nj0 = aadata->jindex_gb[3*i];
1022 nj1 = aadata->jindex_gb[3*i+1];
1023 nj2 = aadata->jindex_gb[3*i+2];
1025 mask = aadata->exclusion_mask_gb[i];
1027 /* Prologue part, including exclusion mask */
1028 for (j = nj0; j < nj1; j++, mask++)
1034 /* load j atom coordinates */
1039 /* Calculate distance */
1046 fgb = rbai*dadx[n++];
1047 fgb_ai = rbaj*dadx[n++];
1049 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1060 /* Update force on atom aj */
1061 f[3*k] = f[3*k] - tx;
1062 f[3*k+1] = f[3*k+1] - ty;
1063 f[3*k+2] = f[3*k+2] - tz;
1067 /* Main part, no exclusions */
1068 for (j = nj1; j < nj2; j++)
1072 /* load j atom coordinates */
1077 /* Calculate distance */
1084 fgb = rbai*dadx[n++];
1085 fgb_ai = rbaj*dadx[n++];
1087 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1098 /* Update force on atom aj */
1099 f[3*k] = f[3*k] - tx;
1100 f[3*k+1] = f[3*k+1] - ty;
1101 f[3*k+2] = f[3*k+2] - tz;
1103 /* Update force and shift forces on atom ai */
1104 f[3*i] = f[3*i] + fix;
1105 f[3*i+1] = f[3*i+1] + fiy;
1106 f[3*i+2] = f[3*i+2] + fiz;