Move generalized born to C++
[alexxy/gromacs.git] / src / gromacs / mdlib / genborn_allvsall.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2009, The GROMACS Development Team.
6  * Copyright (c) 2010,2014,2015, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "genborn_allvsall.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/legacyheaders/genborn.h"
46 #include "gromacs/legacyheaders/network.h"
47 #include "gromacs/legacyheaders/types/simple.h"
48 #include "gromacs/math/units.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/utility/smalloc.h"
51
52
53 typedef struct
54 {
55     int *      jindex_gb;
56     int **     exclusion_mask_gb;
57 }
58 gmx_allvsallgb2_data_t;
59
60 static int
61 calc_maxoffset(int i, int natoms)
62 {
63     int maxoffset;
64
65     if ((natoms % 2) == 1)
66     {
67         /* Odd number of atoms, easy */
68         maxoffset = natoms/2;
69     }
70     else if ((natoms % 4) == 0)
71     {
72         /* Multiple of four is hard */
73         if (i < natoms/2)
74         {
75             if ((i % 2) == 0)
76             {
77                 maxoffset = natoms/2;
78             }
79             else
80             {
81                 maxoffset = natoms/2-1;
82             }
83         }
84         else
85         {
86             if ((i % 2) == 1)
87             {
88                 maxoffset = natoms/2;
89             }
90             else
91             {
92                 maxoffset = natoms/2-1;
93             }
94         }
95     }
96     else
97     {
98         /* natoms/2 = odd */
99         if ((i % 2) == 0)
100         {
101             maxoffset = natoms/2;
102         }
103         else
104         {
105             maxoffset = natoms/2-1;
106         }
107     }
108
109     return maxoffset;
110 }
111
112 static void
113 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
114                                 t_ilist     *                  ilist,
115                                 int                            natoms,
116                                 gmx_bool                       bInclude12,
117                                 gmx_bool                       bInclude13,
118                                 gmx_bool                       bInclude14)
119 {
120     int i, j, k;
121     int a1, a2;
122     int max_offset;
123     int max_excl_offset;
124
125     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
126      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
127      * whether they should interact or not.
128      *
129      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
130      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
131      * we create a jindex array with three elements per i atom: the starting point, the point to
132      * which we need to check exclusions, and the end point.
133      * This way we only have to allocate a short exclusion mask per i atom.
134      */
135
136     /* Allocate memory for jindex arrays */
137     snew(aadata->jindex_gb, 3*natoms);
138
139     /* Pointer to lists with exclusion masks */
140     snew(aadata->exclusion_mask_gb, natoms);
141
142     for (i = 0; i < natoms; i++)
143     {
144         /* Start */
145         aadata->jindex_gb[3*i]       = i+1;
146         max_offset                   = calc_maxoffset(i, natoms);
147
148         /* first check the max range of atoms to EXCLUDE */
149         max_excl_offset = 0;
150         if (!bInclude12)
151         {
152             for (j = 0; j < ilist[F_GB12].nr; j += 3)
153             {
154                 a1 = ilist[F_GB12].iatoms[j+1];
155                 a2 = ilist[F_GB12].iatoms[j+2];
156
157                 if (a1 == i)
158                 {
159                     k = a2-a1;
160                 }
161                 else if (a2 == i)
162                 {
163                     k = a1+natoms-a2;
164                 }
165                 else
166                 {
167                     continue;
168                 }
169                 if (k > 0 && k <= max_offset)
170                 {
171                     max_excl_offset = std::max(k, max_excl_offset);
172                 }
173             }
174         }
175         if (!bInclude13)
176         {
177             for (j = 0; j < ilist[F_GB13].nr; j += 3)
178             {
179                 a1 = ilist[F_GB13].iatoms[j+1];
180                 a2 = ilist[F_GB13].iatoms[j+2];
181
182
183                 if (a1 == i)
184                 {
185                     k = a2-a1;
186                 }
187                 else if (a2 == i)
188                 {
189                     k = a1+natoms-a2;
190                 }
191                 else
192                 {
193                     continue;
194                 }
195                 if (k > 0 && k <= max_offset)
196                 {
197                     max_excl_offset = std::max(k, max_excl_offset);
198                 }
199             }
200         }
201         if (!bInclude14)
202         {
203             for (j = 0; j < ilist[F_GB14].nr; j += 3)
204             {
205                 a1 = ilist[F_GB14].iatoms[j+1];
206                 a2 = ilist[F_GB14].iatoms[j+2];
207
208
209                 if (a1 == i)
210                 {
211                     k = a2-a1;
212                 }
213                 else if (a2 == i)
214                 {
215                     k = a1+natoms-a2;
216                 }
217                 else
218                 {
219                     continue;
220                 }
221                 if (k > 0 && k <= max_offset)
222                 {
223                     max_excl_offset = std::max(k, max_excl_offset);
224                 }
225             }
226         }
227         max_excl_offset = std::min(max_offset, max_excl_offset);
228
229         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
230
231         snew(aadata->exclusion_mask_gb[i], max_excl_offset);
232
233         /* Include everything by default */
234         for (j = 0; j < max_excl_offset; j++)
235         {
236             /* Use all-ones to mark interactions that should be present, compatible with SSE */
237             aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
238         }
239         /* Go through exclusions again */
240         if (!bInclude12)
241         {
242             for (j = 0; j < ilist[F_GB12].nr; j += 3)
243             {
244                 a1 = ilist[F_GB12].iatoms[j+1];
245                 a2 = ilist[F_GB12].iatoms[j+2];
246
247                 if (a1 == i)
248                 {
249                     k = a2-a1;
250                 }
251                 else if (a2 == i)
252                 {
253                     k = a1+natoms-a2;
254                 }
255                 else
256                 {
257                     continue;
258                 }
259                 if (k > 0 && k <= max_offset)
260                 {
261                     aadata->exclusion_mask_gb[i][k-1] = 0;
262                 }
263             }
264         }
265         if (!bInclude13)
266         {
267             for (j = 0; j < ilist[F_GB13].nr; j += 3)
268             {
269                 a1 = ilist[F_GB13].iatoms[j+1];
270                 a2 = ilist[F_GB13].iatoms[j+2];
271
272                 if (a1 == i)
273                 {
274                     k = a2-a1;
275                 }
276                 else if (a2 == i)
277                 {
278                     k = a1+natoms-a2;
279                 }
280                 else
281                 {
282                     continue;
283                 }
284                 if (k > 0 && k <= max_offset)
285                 {
286                     aadata->exclusion_mask_gb[i][k-1] = 0;
287                 }
288             }
289         }
290         if (!bInclude14)
291         {
292             for (j = 0; j < ilist[F_GB14].nr; j += 3)
293             {
294                 a1 = ilist[F_GB14].iatoms[j+1];
295                 a2 = ilist[F_GB14].iatoms[j+2];
296
297                 if (a1 == i)
298                 {
299                     k = a2-a1;
300                 }
301                 else if (a2 == i)
302                 {
303                     k = a1+natoms-a2;
304                 }
305                 else
306                 {
307                     continue;
308                 }
309                 if (k > 0 && k <= max_offset)
310                 {
311                     aadata->exclusion_mask_gb[i][k-1] = 0;
312                 }
313             }
314         }
315
316         /* End */
317
318         /* End */
319         aadata->jindex_gb[3*i+2] = i+1+max_offset;
320     }
321 }
322
323
324 static void
325 genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
326                        t_ilist     *                  ilist,
327                        int                            natoms,
328                        gmx_bool                       bInclude12,
329                        gmx_bool                       bInclude13,
330                        gmx_bool                       bInclude14)
331 {
332     gmx_allvsallgb2_data_t *aadata;
333
334     snew(aadata, 1);
335     *p_aadata = aadata;
336
337     setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
338 }
339
340
341
342 int
343 genborn_allvsall_calc_still_radii(t_forcerec *           fr,
344                                   t_mdatoms *            mdatoms,
345                                   gmx_genborn_t *        born,
346                                   gmx_localtop_t *       top,
347                                   real *                 x,
348                                   void *                 work)
349 {
350     gmx_allvsallgb2_data_t *aadata;
351     int                     natoms;
352     int                     ni0, ni1;
353     int                     nj0, nj1, nj2;
354     int                     i, j, k, n;
355     int              *      mask;
356
357     real                    ix, iy, iz;
358     real                    jx, jy, jz;
359     real                    dx, dy, dz;
360     real                    rsq, rinv;
361     real                    gpi, rai, vai;
362     real                    prod_ai;
363     real                    irsq, idr4, idr6;
364     real                    raj, rvdw, ratio;
365     real                    vaj, ccf, dccf, theta, cosq;
366     real                    term, prod, icf4, icf6, gpi2, factor, sinq;
367
368     natoms              = mdatoms->nr;
369     ni0                 = 0;
370     ni1                 = mdatoms->homenr;
371     factor              = 0.5*ONE_4PI_EPS0;
372     n                   = 0;
373
374     aadata = *((gmx_allvsallgb2_data_t **)work);
375
376     if (aadata == NULL)
377     {
378         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
379                                FALSE, FALSE, TRUE);
380         *((gmx_allvsallgb2_data_t **)work) = aadata;
381     }
382
383
384     for (i = 0; i < born->nr; i++)
385     {
386         born->gpol_still_work[i] = 0;
387     }
388
389
390     for (i = ni0; i < ni1; i++)
391     {
392         /* We assume shifts are NOT used for all-vs-all interactions */
393
394         /* Load i atom data */
395         ix                = x[3*i];
396         iy                = x[3*i+1];
397         iz                = x[3*i+2];
398
399         gpi               = 0.0;
400
401         rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
402         vai     = born->vsolv[i];
403         prod_ai = STILL_P4*vai;
404
405         /* Load limits for loop over neighbors */
406         nj0              = aadata->jindex_gb[3*i];
407         nj1              = aadata->jindex_gb[3*i+1];
408         nj2              = aadata->jindex_gb[3*i+2];
409
410         mask             = aadata->exclusion_mask_gb[i];
411
412         /* Prologue part, including exclusion mask */
413         for (j = nj0; j < nj1; j++, mask++)
414         {
415             if (*mask != 0)
416             {
417                 k = j%natoms;
418
419                 /* load j atom coordinates */
420                 jx                = x[3*k];
421                 jy                = x[3*k+1];
422                 jz                = x[3*k+2];
423
424                 /* Calculate distance */
425                 dx                = ix - jx;
426                 dy                = iy - jy;
427                 dz                = iz - jz;
428                 rsq               = dx*dx+dy*dy+dz*dz;
429
430                 /* Calculate 1/r and 1/r2 */
431                 rinv              = gmx_invsqrt(rsq);
432                 irsq              = rinv*rinv;
433                 idr4              = irsq*irsq;
434                 idr6              = idr4*irsq;
435
436                 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
437
438                 rvdw  = rai + raj;
439
440                 ratio = rsq / (rvdw * rvdw);
441                 vaj   = born->vsolv[k];
442
443
444                 if (ratio > STILL_P5INV)
445                 {
446                     ccf  = 1.0;
447                     dccf = 0.0;
448                 }
449                 else
450                 {
451                     theta = ratio*STILL_PIP5;
452                     cosq  = cos(theta);
453                     term  = 0.5*(1.0-cosq);
454                     ccf   = term*term;
455                     sinq  = 1.0 - cosq*cosq;
456                     dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
457                 }
458
459                 prod          = STILL_P4*vaj;
460                 icf4          = ccf*idr4;
461                 icf6          = (4*ccf-dccf)*idr6;
462
463                 born->gpol_still_work[k] += prod_ai*icf4;
464                 gpi                       = gpi+prod*icf4;
465
466                 /* Save ai->aj and aj->ai chain rule terms */
467                 fr->dadx[n++]   = prod*icf6;
468                 fr->dadx[n++]   = prod_ai*icf6;
469
470                 /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
471
472             }
473         }
474
475         /* Main part, no exclusions */
476         for (j = nj1; j < nj2; j++)
477         {
478             k = j%natoms;
479
480             /* load j atom coordinates */
481             jx                = x[3*k];
482             jy                = x[3*k+1];
483             jz                = x[3*k+2];
484
485             /* Calculate distance */
486             dx                = ix - jx;
487             dy                = iy - jy;
488             dz                = iz - jz;
489             rsq               = dx*dx+dy*dy+dz*dz;
490
491             /* Calculate 1/r and 1/r2 */
492             rinv              = gmx_invsqrt(rsq);
493             irsq              = rinv*rinv;
494             idr4              = irsq*irsq;
495             idr6              = idr4*irsq;
496
497             raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
498
499             rvdw  = rai + raj;
500
501             ratio = rsq / (rvdw * rvdw);
502             vaj   = born->vsolv[k];
503
504             if (ratio > STILL_P5INV)
505             {
506                 ccf  = 1.0;
507                 dccf = 0.0;
508             }
509             else
510             {
511                 theta = ratio*STILL_PIP5;
512                 cosq  = cos(theta);
513                 term  = 0.5*(1.0-cosq);
514                 ccf   = term*term;
515                 sinq  = 1.0 - cosq*cosq;
516                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
517             }
518
519             prod          = STILL_P4*vaj;
520             icf4          = ccf*idr4;
521             icf6          = (4*ccf-dccf)*idr6;
522
523             born->gpol_still_work[k] += prod_ai*icf4;
524             gpi                       = gpi+prod*icf4;
525
526             /* Save ai->aj and aj->ai chain rule terms */
527             fr->dadx[n++]   = prod*icf6;
528             fr->dadx[n++]   = prod_ai*icf6;
529         }
530         born->gpol_still_work[i] += gpi;
531     }
532
533     /* Parallel summations would go here if ever implemented with DD */
534
535     /* Calculate the radii */
536     for (i = 0; i < natoms; i++)
537     {
538         if (born->use[i] != 0)
539         {
540             gpi             = born->gpol[i]+born->gpol_still_work[i];
541             gpi2            = gpi * gpi;
542             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
543             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
544         }
545     }
546
547     return 0;
548 }
549
550
551
552 int
553 genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
554                                     t_mdatoms *            mdatoms,
555                                     gmx_genborn_t *        born,
556                                     int                    gb_algorithm,
557                                     gmx_localtop_t *       top,
558                                     real *                 x,
559                                     void *                 work)
560 {
561     gmx_allvsallgb2_data_t *aadata;
562     int                     natoms;
563     int                     ni0, ni1;
564     int                     nj0, nj1, nj2;
565     int                     i, j, k, n;
566     int              *      mask;
567
568     real                    ix, iy, iz;
569     real                    jx, jy, jz;
570     real                    dx, dy, dz;
571     real                    rsq, rinv;
572     real                    prod, raj;
573     real                    rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
574     real                    dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
575     real                    lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
576     real                    tchain;
577     real                    dadxi, dadxj;
578     real                    rad, min_rad;
579
580     natoms              = mdatoms->nr;
581     ni0                 = 0;
582     ni1                 = mdatoms->homenr;
583
584     n       = 0;
585     doffset = born->gb_doffset;
586
587     aadata = *((gmx_allvsallgb2_data_t **)work);
588
589     if (aadata == NULL)
590     {
591         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
592                                TRUE, TRUE, TRUE);
593         *((gmx_allvsallgb2_data_t **)work) = aadata;
594     }
595
596     for (i = 0; i < born->nr; i++)
597     {
598         born->gpol_hct_work[i] = 0;
599     }
600
601     for (i = ni0; i < ni1; i++)
602     {
603         /* We assume shifts are NOT used for all-vs-all interactions */
604
605         /* Load i atom data */
606         ix                = x[3*i];
607         iy                = x[3*i+1];
608         iz                = x[3*i+2];
609
610         rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
611         rai_inv  = 1.0/rai;
612
613         sk_ai    = born->param[i];
614         sk2_ai   = sk_ai*sk_ai;
615
616         sum_ai   = 0;
617
618         /* Load limits for loop over neighbors */
619         nj0              = aadata->jindex_gb[3*i];
620         nj1              = aadata->jindex_gb[3*i+1];
621         nj2              = aadata->jindex_gb[3*i+2];
622
623         mask             = aadata->exclusion_mask_gb[i];
624
625         /* Prologue part, including exclusion mask */
626         for (j = nj0; j < nj1; j++, mask++)
627         {
628             if (*mask != 0)
629             {
630                 k = j%natoms;
631
632                 /* load j atom coordinates */
633                 jx                = x[3*k];
634                 jy                = x[3*k+1];
635                 jz                = x[3*k+2];
636
637                 /* Calculate distance */
638                 dx                = ix - jx;
639                 dy                = iy - jy;
640                 dz                = iz - jz;
641                 rsq               = dx*dx+dy*dy+dz*dz;
642
643                 /* Calculate 1/r and 1/r2 */
644                 rinv              = gmx_invsqrt(rsq);
645                 dr                = rsq*rinv;
646
647                 /* sk is precalculated in init_gb() */
648                 sk    = born->param[k];
649                 raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
650
651                 /* aj -> ai interaction */
652
653
654                 if (rai < dr+sk)
655                 {
656                     lij       = 1.0/(dr-sk);
657                     dlij      = 1.0;
658
659                     if (rai > dr-sk)
660                     {
661                         lij  = rai_inv;
662                         dlij = 0.0;
663                     }
664
665                     uij      = 1.0/(dr+sk);
666                     lij2     = lij  * lij;
667                     lij3     = lij2 * lij;
668                     uij2     = uij  * uij;
669                     uij3     = uij2 * uij;
670
671                     diff2    = uij2-lij2;
672
673                     lij_inv  = gmx_invsqrt(lij2);
674                     sk2      = sk*sk;
675                     sk2_rinv = sk2*rinv;
676                     prod     = 0.25*sk2_rinv;
677
678                     log_term = std::log(uij*lij_inv);
679                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
680                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
681
682                     if (rai < sk-dr)
683                     {
684                         tmp = tmp + 2.0 * (rai_inv-lij);
685                     }
686
687                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
688                     t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
689
690                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
691
692                     dadxi = (dlij*t1+t2+t3)*rinv;
693
694                     sum_ai += 0.5*tmp;
695                 }
696                 else
697                 {
698                     dadxi = 0.0;
699                 }
700
701                 /* ai -> aj interaction */
702                 if (raj < dr + sk_ai)
703                 {
704                     lij     = 1.0/(dr-sk_ai);
705                     dlij    = 1.0;
706                     raj_inv = 1.0/raj;
707
708                     if (raj > dr-sk_ai)
709                     {
710                         lij  = raj_inv;
711                         dlij = 0.0;
712                     }
713
714                     lij2     = lij  * lij;
715                     lij3     = lij2 * lij;
716
717                     uij      = 1.0/(dr+sk_ai);
718                     uij2     = uij  * uij;
719                     uij3     = uij2 * uij;
720
721                     diff2    = uij2-lij2;
722
723                     lij_inv  = gmx_invsqrt(lij2);
724                     sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
725                     sk2_rinv = sk2*rinv;
726                     prod     = 0.25 * sk2_rinv;
727
728                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
729                     log_term = std::log(uij*lij_inv);
730
731                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
732
733                     if (raj < sk_ai-dr)
734                     {
735                         tmp     = tmp + 2.0 * (raj_inv-lij);
736                     }
737
738                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
739                     t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
740                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
741
742                     dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule  */
743
744                     born->gpol_hct_work[k] += 0.5*tmp;
745                 }
746                 else
747                 {
748                     dadxj = 0.0;
749                 }
750                 fr->dadx[n++] = dadxi;
751                 fr->dadx[n++] = dadxj;
752
753             }
754         }
755
756         /* Main part, no exclusions */
757         for (j = nj1; j < nj2; j++)
758         {
759             k = j%natoms;
760
761             /* load j atom coordinates */
762             jx                = x[3*k];
763             jy                = x[3*k+1];
764             jz                = x[3*k+2];
765
766             /* Calculate distance */
767             dx                = ix - jx;
768             dy                = iy - jy;
769             dz                = iz - jz;
770             rsq               = dx*dx+dy*dy+dz*dz;
771
772             /* Calculate 1/r and 1/r2 */
773             rinv              = gmx_invsqrt(rsq);
774             dr                = rsq*rinv;
775
776             /* sk is precalculated in init_gb() */
777             sk    = born->param[k];
778             raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
779
780             /* aj -> ai interaction */
781             if (rai < dr+sk)
782             {
783                 lij       = 1.0/(dr-sk);
784                 dlij      = 1.0;
785
786                 if (rai > dr-sk)
787                 {
788                     lij  = rai_inv;
789                     dlij = 0.0;
790                 }
791
792                 uij      = 1.0/(dr+sk);
793                 lij2     = lij  * lij;
794                 lij3     = lij2 * lij;
795                 uij2     = uij  * uij;
796                 uij3     = uij2 * uij;
797
798                 diff2    = uij2-lij2;
799
800                 lij_inv  = gmx_invsqrt(lij2);
801                 sk2      = sk*sk;
802                 sk2_rinv = sk2*rinv;
803                 prod     = 0.25*sk2_rinv;
804
805                 log_term = std::log(uij*lij_inv);
806                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
807                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
808
809                 if (rai < sk-dr)
810                 {
811                     tmp = tmp + 2.0 * (rai_inv-lij);
812                 }
813
814                 /* duij    = 1.0; */
815                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
816                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
817                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
818
819                 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
820
821                 sum_ai += 0.5*tmp;
822             }
823             else
824             {
825                 dadxi = 0.0;
826             }
827
828             /* ai -> aj interaction */
829             if (raj < dr + sk_ai)
830             {
831                 lij     = 1.0/(dr-sk_ai);
832                 dlij    = 1.0;
833                 raj_inv = 1.0/raj;
834
835                 if (raj > dr-sk_ai)
836                 {
837                     lij  = raj_inv;
838                     dlij = 0.0;
839                 }
840
841                 lij2     = lij  * lij;
842                 lij3     = lij2 * lij;
843
844                 uij      = 1.0/(dr+sk_ai);
845                 uij2     = uij  * uij;
846                 uij3     = uij2 * uij;
847
848                 diff2    = uij2-lij2;
849
850                 lij_inv  = gmx_invsqrt(lij2);
851                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
852                 sk2_rinv = sk2*rinv;
853                 prod     = 0.25 * sk2_rinv;
854
855                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
856                 log_term = std::log(uij*lij_inv);
857
858                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
859
860                 if (raj < sk_ai-dr)
861                 {
862                     tmp     = tmp + 2.0 * (raj_inv-lij);
863                 }
864
865                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
866                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
867                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
868
869                 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
870
871                 born->gpol_hct_work[k] += 0.5*tmp;
872             }
873             else
874             {
875                 dadxj = 0.0;
876             }
877             fr->dadx[n++] = dadxi;
878             fr->dadx[n++] = dadxj;
879         }
880         born->gpol_hct_work[i] += sum_ai;
881     }
882
883     /* Parallel summations would go here if ever implemented with DD */
884
885     if (gb_algorithm == egbHCT)
886     {
887         /* HCT */
888         for (i = 0; i < natoms; i++)
889         {
890             if (born->use[i] != 0)
891             {
892                 rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
893                 sum_ai  = 1.0/rai - born->gpol_hct_work[i];
894                 min_rad = rai + born->gb_doffset;
895                 rad     = 1.0/sum_ai;
896
897                 born->bRad[i]   = std::max(rad, min_rad);
898                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
899             }
900         }
901
902     }
903     else
904     {
905         /* OBC */
906         /* Calculate the radii */
907         for (i = 0; i < natoms; i++)
908         {
909             if (born->use[i] != 0)
910             {
911                 rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
912                 rai_inv2   = 1.0/rai;
913                 rai        = rai-doffset;
914                 rai_inv    = 1.0/rai;
915                 sum_ai     = rai * born->gpol_hct_work[i];
916                 sum_ai2    = sum_ai  * sum_ai;
917                 sum_ai3    = sum_ai2 * sum_ai;
918
919                 tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
920                 born->bRad[i] = rai_inv - tsum*rai_inv2;
921                 born->bRad[i] = 1.0 / born->bRad[i];
922
923                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
924
925                 tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
926                 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
927             }
928         }
929     }
930     return 0;
931 }
932
933
934
935
936
937 int
938 genborn_allvsall_calc_chainrule(t_forcerec *           fr,
939                                 t_mdatoms *            mdatoms,
940                                 gmx_genborn_t *        born,
941                                 real *                 x,
942                                 real *                 f,
943                                 int                    gb_algorithm,
944                                 void *                 work)
945 {
946     gmx_allvsallgb2_data_t *aadata;
947     int                     natoms;
948     int                     ni0, ni1;
949     int                     nj0, nj1, nj2;
950     int                     i, j, k, n;
951     int                     idx;
952     int              *      mask;
953
954     real                    ix, iy, iz;
955     real                    fix, fiy, fiz;
956     real                    jx, jy, jz;
957     real                    dx, dy, dz;
958     real                    tx, ty, tz;
959     real                    rbai, rbaj, fgb, fgb_ai, rbi;
960     real              *     rb;
961     real              *     dadx;
962
963     natoms              = mdatoms->nr;
964     ni0                 = 0;
965     ni1                 = mdatoms->homenr;
966     dadx                = fr->dadx;
967
968     aadata = (gmx_allvsallgb2_data_t *)work;
969
970     n  = 0;
971     rb = born->work;
972
973     /* Loop to get the proper form for the Born radius term */
974     if (gb_algorithm == egbSTILL)
975     {
976         for (i = 0; i < natoms; i++)
977         {
978             rbi   = born->bRad[i];
979             rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
980         }
981     }
982     else if (gb_algorithm == egbHCT)
983     {
984         for (i = 0; i < natoms; i++)
985         {
986             rbi   = born->bRad[i];
987             rb[i] = rbi * rbi * fr->dvda[i];
988         }
989     }
990     else if (gb_algorithm == egbOBC)
991     {
992         for (idx = 0; idx < natoms; idx++)
993         {
994             rbi     = born->bRad[idx];
995             rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
996         }
997     }
998
999     for (i = ni0; i < ni1; i++)
1000     {
1001         /* We assume shifts are NOT used for all-vs-all interactions */
1002
1003         /* Load i atom data */
1004         ix                = x[3*i];
1005         iy                = x[3*i+1];
1006         iz                = x[3*i+2];
1007
1008         fix               = 0;
1009         fiy               = 0;
1010         fiz               = 0;
1011
1012         rbai              = rb[i];
1013
1014         /* Load limits for loop over neighbors */
1015         nj0              = aadata->jindex_gb[3*i];
1016         nj1              = aadata->jindex_gb[3*i+1];
1017         nj2              = aadata->jindex_gb[3*i+2];
1018
1019         mask             = aadata->exclusion_mask_gb[i];
1020
1021         /* Prologue part, including exclusion mask */
1022         for (j = nj0; j < nj1; j++, mask++)
1023         {
1024             if (*mask != 0)
1025             {
1026                 k = j%natoms;
1027
1028                 /* load j atom coordinates */
1029                 jx                = x[3*k];
1030                 jy                = x[3*k+1];
1031                 jz                = x[3*k+2];
1032
1033                 /* Calculate distance */
1034                 dx                = ix - jx;
1035                 dy                = iy - jy;
1036                 dz                = iz - jz;
1037
1038                 rbaj              = rb[k];
1039
1040                 fgb     = rbai*dadx[n++];
1041                 fgb_ai  = rbaj*dadx[n++];
1042
1043                 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1044                 fgb     = fgb + fgb_ai;
1045
1046                 tx      = fgb * dx;
1047                 ty      = fgb * dy;
1048                 tz      = fgb * dz;
1049
1050                 fix     = fix + tx;
1051                 fiy     = fiy + ty;
1052                 fiz     = fiz + tz;
1053
1054                 /* Update force on atom aj */
1055                 f[3*k]   = f[3*k] - tx;
1056                 f[3*k+1] = f[3*k+1] - ty;
1057                 f[3*k+2] = f[3*k+2] - tz;
1058             }
1059         }
1060
1061         /* Main part, no exclusions */
1062         for (j = nj1; j < nj2; j++)
1063         {
1064             k = j%natoms;
1065
1066             /* load j atom coordinates */
1067             jx                = x[3*k];
1068             jy                = x[3*k+1];
1069             jz                = x[3*k+2];
1070
1071             /* Calculate distance */
1072             dx                = ix - jx;
1073             dy                = iy - jy;
1074             dz                = iz - jz;
1075
1076             rbaj              = rb[k];
1077
1078             fgb     = rbai*dadx[n++];
1079             fgb_ai  = rbaj*dadx[n++];
1080
1081             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1082             fgb     = fgb + fgb_ai;
1083
1084             tx      = fgb * dx;
1085             ty      = fgb * dy;
1086             tz      = fgb * dz;
1087
1088             fix     = fix + tx;
1089             fiy     = fiy + ty;
1090             fiz     = fiz + tz;
1091
1092             /* Update force on atom aj */
1093             f[3*k]   = f[3*k] - tx;
1094             f[3*k+1] = f[3*k+1] - ty;
1095             f[3*k+2] = f[3*k+2] - tz;
1096         }
1097         /* Update force and shift forces on atom ai */
1098         f[3*i]   = f[3*i] + fix;
1099         f[3*i+1] = f[3*i+1] + fiy;
1100         f[3*i+2] = f[3*i+2] + fiz;
1101     }
1102
1103     return 0;
1104 }