1d96860895bcb9c0fe0e05868ccd7cdeeb8f634a
[alexxy/gromacs.git] / src / gromacs / mdlib / genborn_allvsall.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2009, The GROMACS Development Team.
6  * Copyright (c) 2010,2014,2015,2017, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "genborn_allvsall.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/network.h"
46 #include "gromacs/math/functions.h"
47 #include "gromacs/math/units.h"
48 #include "gromacs/math/vec.h"
49 #include "gromacs/mdlib/genborn.h"
50 #include "gromacs/mdtypes/forcerec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/mdtypes/mdatom.h"
53 #include "gromacs/topology/topology.h"
54 #include "gromacs/utility/smalloc.h"
55
56
57 typedef struct
58 {
59     int *      jindex_gb;
60     int **     exclusion_mask_gb;
61 }
62 gmx_allvsallgb2_data_t;
63
64 static int
65 calc_maxoffset(int i, int natoms)
66 {
67     int maxoffset;
68
69     if ((natoms % 2) == 1)
70     {
71         /* Odd number of atoms, easy */
72         maxoffset = natoms/2;
73     }
74     else if ((natoms % 4) == 0)
75     {
76         /* Multiple of four is hard */
77         if (i < natoms/2)
78         {
79             if ((i % 2) == 0)
80             {
81                 maxoffset = natoms/2;
82             }
83             else
84             {
85                 maxoffset = natoms/2-1;
86             }
87         }
88         else
89         {
90             if ((i % 2) == 1)
91             {
92                 maxoffset = natoms/2;
93             }
94             else
95             {
96                 maxoffset = natoms/2-1;
97             }
98         }
99     }
100     else
101     {
102         /* natoms/2 = odd */
103         if ((i % 2) == 0)
104         {
105             maxoffset = natoms/2;
106         }
107         else
108         {
109             maxoffset = natoms/2-1;
110         }
111     }
112
113     return maxoffset;
114 }
115
116 static void
117 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
118                                 t_ilist     *                  ilist,
119                                 int                            natoms,
120                                 gmx_bool                       bInclude12,
121                                 gmx_bool                       bInclude13,
122                                 gmx_bool                       bInclude14)
123 {
124     int i, j, k;
125     int a1, a2;
126     int max_offset;
127     int max_excl_offset;
128
129     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
130      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
131      * whether they should interact or not.
132      *
133      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
134      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
135      * we create a jindex array with three elements per i atom: the starting point, the point to
136      * which we need to check exclusions, and the end point.
137      * This way we only have to allocate a short exclusion mask per i atom.
138      */
139
140     /* Allocate memory for jindex arrays */
141     snew(aadata->jindex_gb, 3*natoms);
142
143     /* Pointer to lists with exclusion masks */
144     snew(aadata->exclusion_mask_gb, natoms);
145
146     for (i = 0; i < natoms; i++)
147     {
148         /* Start */
149         aadata->jindex_gb[3*i]       = i+1;
150         max_offset                   = calc_maxoffset(i, natoms);
151
152         /* first check the max range of atoms to EXCLUDE */
153         max_excl_offset = 0;
154         if (!bInclude12)
155         {
156             for (j = 0; j < ilist[F_GB12].nr; j += 3)
157             {
158                 a1 = ilist[F_GB12].iatoms[j+1];
159                 a2 = ilist[F_GB12].iatoms[j+2];
160
161                 if (a1 == i)
162                 {
163                     k = a2-a1;
164                 }
165                 else if (a2 == i)
166                 {
167                     k = a1+natoms-a2;
168                 }
169                 else
170                 {
171                     continue;
172                 }
173                 if (k > 0 && k <= max_offset)
174                 {
175                     max_excl_offset = std::max(k, max_excl_offset);
176                 }
177             }
178         }
179         if (!bInclude13)
180         {
181             for (j = 0; j < ilist[F_GB13].nr; j += 3)
182             {
183                 a1 = ilist[F_GB13].iatoms[j+1];
184                 a2 = ilist[F_GB13].iatoms[j+2];
185
186
187                 if (a1 == i)
188                 {
189                     k = a2-a1;
190                 }
191                 else if (a2 == i)
192                 {
193                     k = a1+natoms-a2;
194                 }
195                 else
196                 {
197                     continue;
198                 }
199                 if (k > 0 && k <= max_offset)
200                 {
201                     max_excl_offset = std::max(k, max_excl_offset);
202                 }
203             }
204         }
205         if (!bInclude14)
206         {
207             for (j = 0; j < ilist[F_GB14].nr; j += 3)
208             {
209                 a1 = ilist[F_GB14].iatoms[j+1];
210                 a2 = ilist[F_GB14].iatoms[j+2];
211
212
213                 if (a1 == i)
214                 {
215                     k = a2-a1;
216                 }
217                 else if (a2 == i)
218                 {
219                     k = a1+natoms-a2;
220                 }
221                 else
222                 {
223                     continue;
224                 }
225                 if (k > 0 && k <= max_offset)
226                 {
227                     max_excl_offset = std::max(k, max_excl_offset);
228                 }
229             }
230         }
231         max_excl_offset = std::min(max_offset, max_excl_offset);
232
233         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
234
235         snew(aadata->exclusion_mask_gb[i], max_excl_offset);
236
237         /* Include everything by default */
238         for (j = 0; j < max_excl_offset; j++)
239         {
240             /* Use all-ones to mark interactions that should be present, compatible with SSE */
241             aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
242         }
243         /* Go through exclusions again */
244         if (!bInclude12)
245         {
246             for (j = 0; j < ilist[F_GB12].nr; j += 3)
247             {
248                 a1 = ilist[F_GB12].iatoms[j+1];
249                 a2 = ilist[F_GB12].iatoms[j+2];
250
251                 if (a1 == i)
252                 {
253                     k = a2-a1;
254                 }
255                 else if (a2 == i)
256                 {
257                     k = a1+natoms-a2;
258                 }
259                 else
260                 {
261                     continue;
262                 }
263                 if (k > 0 && k <= max_offset)
264                 {
265                     aadata->exclusion_mask_gb[i][k-1] = 0;
266                 }
267             }
268         }
269         if (!bInclude13)
270         {
271             for (j = 0; j < ilist[F_GB13].nr; j += 3)
272             {
273                 a1 = ilist[F_GB13].iatoms[j+1];
274                 a2 = ilist[F_GB13].iatoms[j+2];
275
276                 if (a1 == i)
277                 {
278                     k = a2-a1;
279                 }
280                 else if (a2 == i)
281                 {
282                     k = a1+natoms-a2;
283                 }
284                 else
285                 {
286                     continue;
287                 }
288                 if (k > 0 && k <= max_offset)
289                 {
290                     aadata->exclusion_mask_gb[i][k-1] = 0;
291                 }
292             }
293         }
294         if (!bInclude14)
295         {
296             for (j = 0; j < ilist[F_GB14].nr; j += 3)
297             {
298                 a1 = ilist[F_GB14].iatoms[j+1];
299                 a2 = ilist[F_GB14].iatoms[j+2];
300
301                 if (a1 == i)
302                 {
303                     k = a2-a1;
304                 }
305                 else if (a2 == i)
306                 {
307                     k = a1+natoms-a2;
308                 }
309                 else
310                 {
311                     continue;
312                 }
313                 if (k > 0 && k <= max_offset)
314                 {
315                     aadata->exclusion_mask_gb[i][k-1] = 0;
316                 }
317             }
318         }
319
320         /* End */
321
322         /* End */
323         aadata->jindex_gb[3*i+2] = i+1+max_offset;
324     }
325 }
326
327
328 static void
329 genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
330                        t_ilist     *                  ilist,
331                        int                            natoms,
332                        gmx_bool                       bInclude12,
333                        gmx_bool                       bInclude13,
334                        gmx_bool                       bInclude14)
335 {
336     gmx_allvsallgb2_data_t *aadata;
337
338     snew(aadata, 1);
339     *p_aadata = aadata;
340
341     setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
342 }
343
344
345
346 int
347 genborn_allvsall_calc_still_radii(t_forcerec *           fr,
348                                   t_mdatoms *            mdatoms,
349                                   gmx_genborn_t *        born,
350                                   gmx_localtop_t *       top,
351                                   real *                 x,
352                                   void *                 work)
353 {
354     gmx_allvsallgb2_data_t *aadata;
355     int                     natoms;
356     int                     ni0, ni1;
357     int                     nj0, nj1, nj2;
358     int                     i, j, k, n;
359     int              *      mask;
360
361     real                    ix, iy, iz;
362     real                    jx, jy, jz;
363     real                    dx, dy, dz;
364     real                    rsq, rinv;
365     real                    gpi, rai, vai;
366     real                    prod_ai;
367     real                    irsq, idr4, idr6;
368     real                    raj, rvdw, ratio;
369     real                    vaj, ccf, dccf, theta, cosq;
370     real                    term, prod, icf4, icf6, gpi2, factor, sinq;
371
372     natoms              = mdatoms->nr;
373     ni0                 = 0;
374     ni1                 = mdatoms->homenr;
375     factor              = 0.5*ONE_4PI_EPS0;
376     n                   = 0;
377
378     aadata = *((gmx_allvsallgb2_data_t **)work);
379
380     if (aadata == nullptr)
381     {
382         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
383                                FALSE, FALSE, TRUE);
384         *((gmx_allvsallgb2_data_t **)work) = aadata;
385     }
386
387
388     for (i = 0; i < born->nr; i++)
389     {
390         born->gpol_still_work[i] = 0;
391     }
392
393
394     for (i = ni0; i < ni1; i++)
395     {
396         /* We assume shifts are NOT used for all-vs-all interactions */
397
398         /* Load i atom data */
399         ix                = x[3*i];
400         iy                = x[3*i+1];
401         iz                = x[3*i+2];
402
403         gpi               = 0.0;
404
405         rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
406         vai     = born->vsolv[i];
407         prod_ai = STILL_P4*vai;
408
409         /* Load limits for loop over neighbors */
410         nj0              = aadata->jindex_gb[3*i];
411         nj1              = aadata->jindex_gb[3*i+1];
412         nj2              = aadata->jindex_gb[3*i+2];
413
414         mask             = aadata->exclusion_mask_gb[i];
415
416         /* Prologue part, including exclusion mask */
417         for (j = nj0; j < nj1; j++, mask++)
418         {
419             if (*mask != 0)
420             {
421                 k = j%natoms;
422
423                 /* load j atom coordinates */
424                 jx                = x[3*k];
425                 jy                = x[3*k+1];
426                 jz                = x[3*k+2];
427
428                 /* Calculate distance */
429                 dx                = ix - jx;
430                 dy                = iy - jy;
431                 dz                = iz - jz;
432                 rsq               = dx*dx+dy*dy+dz*dz;
433
434                 /* Calculate 1/r and 1/r2 */
435                 rinv              = gmx::invsqrt(rsq);
436                 irsq              = rinv*rinv;
437                 idr4              = irsq*irsq;
438                 idr6              = idr4*irsq;
439
440                 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
441
442                 rvdw  = rai + raj;
443
444                 ratio = rsq / (rvdw * rvdw);
445                 vaj   = born->vsolv[k];
446
447
448                 if (ratio > STILL_P5INV)
449                 {
450                     ccf  = 1.0;
451                     dccf = 0.0;
452                 }
453                 else
454                 {
455                     theta = ratio*STILL_PIP5;
456                     cosq  = cos(theta);
457                     term  = 0.5*(1.0-cosq);
458                     ccf   = term*term;
459                     sinq  = 1.0 - cosq*cosq;
460                     dccf  = 2.0*term*sinq*gmx::invsqrt(sinq)*theta;
461                 }
462
463                 prod          = STILL_P4*vaj;
464                 icf4          = ccf*idr4;
465                 icf6          = (4*ccf-dccf)*idr6;
466
467                 born->gpol_still_work[k] += prod_ai*icf4;
468                 gpi                       = gpi+prod*icf4;
469
470                 /* Save ai->aj and aj->ai chain rule terms */
471                 fr->dadx[n++]   = prod*icf6;
472                 fr->dadx[n++]   = prod_ai*icf6;
473
474                 /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
475
476             }
477         }
478
479         /* Main part, no exclusions */
480         for (j = nj1; j < nj2; j++)
481         {
482             k = j%natoms;
483
484             /* load j atom coordinates */
485             jx                = x[3*k];
486             jy                = x[3*k+1];
487             jz                = x[3*k+2];
488
489             /* Calculate distance */
490             dx                = ix - jx;
491             dy                = iy - jy;
492             dz                = iz - jz;
493             rsq               = dx*dx+dy*dy+dz*dz;
494
495             /* Calculate 1/r and 1/r2 */
496             rinv              = gmx::invsqrt(rsq);
497             irsq              = rinv*rinv;
498             idr4              = irsq*irsq;
499             idr6              = idr4*irsq;
500
501             raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
502
503             rvdw  = rai + raj;
504
505             ratio = rsq / (rvdw * rvdw);
506             vaj   = born->vsolv[k];
507
508             if (ratio > STILL_P5INV)
509             {
510                 ccf  = 1.0;
511                 dccf = 0.0;
512             }
513             else
514             {
515                 theta = ratio*STILL_PIP5;
516                 cosq  = cos(theta);
517                 term  = 0.5*(1.0-cosq);
518                 ccf   = term*term;
519                 sinq  = 1.0 - cosq*cosq;
520                 dccf  = 2.0*term*sinq*gmx::invsqrt(sinq)*theta;
521             }
522
523             prod          = STILL_P4*vaj;
524             icf4          = ccf*idr4;
525             icf6          = (4*ccf-dccf)*idr6;
526
527             born->gpol_still_work[k] += prod_ai*icf4;
528             gpi                       = gpi+prod*icf4;
529
530             /* Save ai->aj and aj->ai chain rule terms */
531             fr->dadx[n++]   = prod*icf6;
532             fr->dadx[n++]   = prod_ai*icf6;
533         }
534         born->gpol_still_work[i] += gpi;
535     }
536
537     /* Parallel summations would go here if ever implemented with DD */
538
539     /* Calculate the radii */
540     for (i = 0; i < natoms; i++)
541     {
542         if (born->use[i] != 0)
543         {
544             gpi             = born->gpol[i]+born->gpol_still_work[i];
545             gpi2            = gpi * gpi;
546             born->bRad[i]   = factor*gmx::invsqrt(gpi2);
547             fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
548         }
549     }
550
551     return 0;
552 }
553
554
555
556 int
557 genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
558                                     t_mdatoms *            mdatoms,
559                                     gmx_genborn_t *        born,
560                                     int                    gb_algorithm,
561                                     gmx_localtop_t *       top,
562                                     real *                 x,
563                                     void *                 work)
564 {
565     gmx_allvsallgb2_data_t *aadata;
566     int                     natoms;
567     int                     ni0, ni1;
568     int                     nj0, nj1, nj2;
569     int                     i, j, k, n;
570     int              *      mask;
571
572     real                    ix, iy, iz;
573     real                    jx, jy, jz;
574     real                    dx, dy, dz;
575     real                    rsq, rinv;
576     real                    prod, raj;
577     real                    rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
578     real                    dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
579     real                    lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
580     real                    tchain;
581     real                    dadxi, dadxj;
582     real                    rad, min_rad;
583
584     natoms              = mdatoms->nr;
585     ni0                 = 0;
586     ni1                 = mdatoms->homenr;
587
588     n       = 0;
589     doffset = born->gb_doffset;
590
591     aadata = *((gmx_allvsallgb2_data_t **)work);
592
593     if (aadata == nullptr)
594     {
595         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
596                                TRUE, TRUE, TRUE);
597         *((gmx_allvsallgb2_data_t **)work) = aadata;
598     }
599
600     for (i = 0; i < born->nr; i++)
601     {
602         born->gpol_hct_work[i] = 0;
603     }
604
605     for (i = ni0; i < ni1; i++)
606     {
607         /* We assume shifts are NOT used for all-vs-all interactions */
608
609         /* Load i atom data */
610         ix                = x[3*i];
611         iy                = x[3*i+1];
612         iz                = x[3*i+2];
613
614         rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
615         rai_inv  = 1.0/rai;
616
617         sk_ai    = born->param[i];
618         sk2_ai   = sk_ai*sk_ai;
619
620         sum_ai   = 0;
621
622         /* Load limits for loop over neighbors */
623         nj0              = aadata->jindex_gb[3*i];
624         nj1              = aadata->jindex_gb[3*i+1];
625         nj2              = aadata->jindex_gb[3*i+2];
626
627         mask             = aadata->exclusion_mask_gb[i];
628
629         /* Prologue part, including exclusion mask */
630         for (j = nj0; j < nj1; j++, mask++)
631         {
632             if (*mask != 0)
633             {
634                 k = j%natoms;
635
636                 /* load j atom coordinates */
637                 jx                = x[3*k];
638                 jy                = x[3*k+1];
639                 jz                = x[3*k+2];
640
641                 /* Calculate distance */
642                 dx                = ix - jx;
643                 dy                = iy - jy;
644                 dz                = iz - jz;
645                 rsq               = dx*dx+dy*dy+dz*dz;
646
647                 /* Calculate 1/r and 1/r2 */
648                 rinv              = gmx::invsqrt(rsq);
649                 dr                = rsq*rinv;
650
651                 /* sk is precalculated in init_gb() */
652                 sk    = born->param[k];
653                 raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
654
655                 /* aj -> ai interaction */
656
657
658                 if (rai < dr+sk)
659                 {
660                     lij       = 1.0/(dr-sk);
661                     dlij      = 1.0;
662
663                     if (rai > dr-sk)
664                     {
665                         lij  = rai_inv;
666                         dlij = 0.0;
667                     }
668
669                     uij      = 1.0/(dr+sk);
670                     lij2     = lij  * lij;
671                     lij3     = lij2 * lij;
672                     uij2     = uij  * uij;
673                     uij3     = uij2 * uij;
674
675                     diff2    = uij2-lij2;
676
677                     lij_inv  = gmx::invsqrt(lij2);
678                     sk2      = sk*sk;
679                     sk2_rinv = sk2*rinv;
680                     prod     = 0.25*sk2_rinv;
681
682                     log_term = std::log(uij*lij_inv);
683                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
684                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
685
686                     if (rai < sk-dr)
687                     {
688                         tmp = tmp + 2.0 * (rai_inv-lij);
689                     }
690
691                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
692                     t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
693
694                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
695
696                     dadxi = (dlij*t1+t2+t3)*rinv;
697
698                     sum_ai += 0.5*tmp;
699                 }
700                 else
701                 {
702                     dadxi = 0.0;
703                 }
704
705                 /* ai -> aj interaction */
706                 if (raj < dr + sk_ai)
707                 {
708                     lij     = 1.0/(dr-sk_ai);
709                     dlij    = 1.0;
710                     raj_inv = 1.0/raj;
711
712                     if (raj > dr-sk_ai)
713                     {
714                         lij  = raj_inv;
715                         dlij = 0.0;
716                     }
717
718                     lij2     = lij  * lij;
719                     lij3     = lij2 * lij;
720
721                     uij      = 1.0/(dr+sk_ai);
722                     uij2     = uij  * uij;
723                     uij3     = uij2 * uij;
724
725                     diff2    = uij2-lij2;
726
727                     lij_inv  = gmx::invsqrt(lij2);
728                     sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
729                     sk2_rinv = sk2*rinv;
730                     prod     = 0.25 * sk2_rinv;
731
732                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
733                     log_term = std::log(uij*lij_inv);
734
735                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
736
737                     if (raj < sk_ai-dr)
738                     {
739                         tmp     = tmp + 2.0 * (raj_inv-lij);
740                     }
741
742                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
743                     t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
744                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
745
746                     dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule  */
747
748                     born->gpol_hct_work[k] += 0.5*tmp;
749                 }
750                 else
751                 {
752                     dadxj = 0.0;
753                 }
754                 fr->dadx[n++] = dadxi;
755                 fr->dadx[n++] = dadxj;
756
757             }
758         }
759
760         /* Main part, no exclusions */
761         for (j = nj1; j < nj2; j++)
762         {
763             k = j%natoms;
764
765             /* load j atom coordinates */
766             jx                = x[3*k];
767             jy                = x[3*k+1];
768             jz                = x[3*k+2];
769
770             /* Calculate distance */
771             dx                = ix - jx;
772             dy                = iy - jy;
773             dz                = iz - jz;
774             rsq               = dx*dx+dy*dy+dz*dz;
775
776             /* Calculate 1/r and 1/r2 */
777             rinv              = gmx::invsqrt(rsq);
778             dr                = rsq*rinv;
779
780             /* sk is precalculated in init_gb() */
781             sk    = born->param[k];
782             raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
783
784             /* aj -> ai interaction */
785             if (rai < dr+sk)
786             {
787                 lij       = 1.0/(dr-sk);
788                 dlij      = 1.0;
789
790                 if (rai > dr-sk)
791                 {
792                     lij  = rai_inv;
793                     dlij = 0.0;
794                 }
795
796                 uij      = 1.0/(dr+sk);
797                 lij2     = lij  * lij;
798                 lij3     = lij2 * lij;
799                 uij2     = uij  * uij;
800                 uij3     = uij2 * uij;
801
802                 diff2    = uij2-lij2;
803
804                 lij_inv  = gmx::invsqrt(lij2);
805                 sk2      = sk*sk;
806                 sk2_rinv = sk2*rinv;
807                 prod     = 0.25*sk2_rinv;
808
809                 log_term = std::log(uij*lij_inv);
810                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
811                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
812
813                 if (rai < sk-dr)
814                 {
815                     tmp = tmp + 2.0 * (rai_inv-lij);
816                 }
817
818                 /* duij    = 1.0; */
819                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
820                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
821                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
822
823                 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
824
825                 sum_ai += 0.5*tmp;
826             }
827             else
828             {
829                 dadxi = 0.0;
830             }
831
832             /* ai -> aj interaction */
833             if (raj < dr + sk_ai)
834             {
835                 lij     = 1.0/(dr-sk_ai);
836                 dlij    = 1.0;
837                 raj_inv = 1.0/raj;
838
839                 if (raj > dr-sk_ai)
840                 {
841                     lij  = raj_inv;
842                     dlij = 0.0;
843                 }
844
845                 lij2     = lij  * lij;
846                 lij3     = lij2 * lij;
847
848                 uij      = 1.0/(dr+sk_ai);
849                 uij2     = uij  * uij;
850                 uij3     = uij2 * uij;
851
852                 diff2    = uij2-lij2;
853
854                 lij_inv  = gmx::invsqrt(lij2);
855                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
856                 sk2_rinv = sk2*rinv;
857                 prod     = 0.25 * sk2_rinv;
858
859                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
860                 log_term = std::log(uij*lij_inv);
861
862                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
863
864                 if (raj < sk_ai-dr)
865                 {
866                     tmp     = tmp + 2.0 * (raj_inv-lij);
867                 }
868
869                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
870                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
871                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
872
873                 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
874
875                 born->gpol_hct_work[k] += 0.5*tmp;
876             }
877             else
878             {
879                 dadxj = 0.0;
880             }
881             fr->dadx[n++] = dadxi;
882             fr->dadx[n++] = dadxj;
883         }
884         born->gpol_hct_work[i] += sum_ai;
885     }
886
887     /* Parallel summations would go here if ever implemented with DD */
888
889     if (gb_algorithm == egbHCT)
890     {
891         /* HCT */
892         for (i = 0; i < natoms; i++)
893         {
894             if (born->use[i] != 0)
895             {
896                 rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
897                 sum_ai  = 1.0/rai - born->gpol_hct_work[i];
898                 min_rad = rai + born->gb_doffset;
899                 rad     = 1.0/sum_ai;
900
901                 born->bRad[i]   = std::max(rad, min_rad);
902                 fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
903             }
904         }
905
906     }
907     else
908     {
909         /* OBC */
910         /* Calculate the radii */
911         for (i = 0; i < natoms; i++)
912         {
913             if (born->use[i] != 0)
914             {
915                 rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
916                 rai_inv2   = 1.0/rai;
917                 rai        = rai-doffset;
918                 rai_inv    = 1.0/rai;
919                 sum_ai     = rai * born->gpol_hct_work[i];
920                 sum_ai2    = sum_ai  * sum_ai;
921                 sum_ai3    = sum_ai2 * sum_ai;
922
923                 tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
924                 born->bRad[i] = rai_inv - tsum*rai_inv2;
925                 born->bRad[i] = 1.0 / born->bRad[i];
926
927                 fr->invsqrta[i] = gmx::invsqrt(born->bRad[i]);
928
929                 tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
930                 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
931             }
932         }
933     }
934     return 0;
935 }
936
937
938
939
940
941 int
942 genborn_allvsall_calc_chainrule(t_forcerec *           fr,
943                                 t_mdatoms *            mdatoms,
944                                 gmx_genborn_t *        born,
945                                 real *                 x,
946                                 real *                 f,
947                                 int                    gb_algorithm,
948                                 void *                 work)
949 {
950     gmx_allvsallgb2_data_t *aadata;
951     int                     natoms;
952     int                     ni0, ni1;
953     int                     nj0, nj1, nj2;
954     int                     i, j, k, n;
955     int                     idx;
956     int              *      mask;
957
958     real                    ix, iy, iz;
959     real                    fix, fiy, fiz;
960     real                    jx, jy, jz;
961     real                    dx, dy, dz;
962     real                    tx, ty, tz;
963     real                    rbai, rbaj, fgb, fgb_ai, rbi;
964     real              *     rb;
965     real              *     dadx;
966
967     natoms              = mdatoms->nr;
968     ni0                 = 0;
969     ni1                 = mdatoms->homenr;
970     dadx                = fr->dadx;
971
972     aadata = (gmx_allvsallgb2_data_t *)work;
973
974     n  = 0;
975     rb = born->work;
976
977     /* Loop to get the proper form for the Born radius term */
978     if (gb_algorithm == egbSTILL)
979     {
980         for (i = 0; i < natoms; i++)
981         {
982             rbi   = born->bRad[i];
983             rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
984         }
985     }
986     else if (gb_algorithm == egbHCT)
987     {
988         for (i = 0; i < natoms; i++)
989         {
990             rbi   = born->bRad[i];
991             rb[i] = rbi * rbi * fr->dvda[i];
992         }
993     }
994     else if (gb_algorithm == egbOBC)
995     {
996         for (idx = 0; idx < natoms; idx++)
997         {
998             rbi     = born->bRad[idx];
999             rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1000         }
1001     }
1002
1003     for (i = ni0; i < ni1; i++)
1004     {
1005         /* We assume shifts are NOT used for all-vs-all interactions */
1006
1007         /* Load i atom data */
1008         ix                = x[3*i];
1009         iy                = x[3*i+1];
1010         iz                = x[3*i+2];
1011
1012         fix               = 0;
1013         fiy               = 0;
1014         fiz               = 0;
1015
1016         rbai              = rb[i];
1017
1018         /* Load limits for loop over neighbors */
1019         nj0              = aadata->jindex_gb[3*i];
1020         nj1              = aadata->jindex_gb[3*i+1];
1021         nj2              = aadata->jindex_gb[3*i+2];
1022
1023         mask             = aadata->exclusion_mask_gb[i];
1024
1025         /* Prologue part, including exclusion mask */
1026         for (j = nj0; j < nj1; j++, mask++)
1027         {
1028             if (*mask != 0)
1029             {
1030                 k = j%natoms;
1031
1032                 /* load j atom coordinates */
1033                 jx                = x[3*k];
1034                 jy                = x[3*k+1];
1035                 jz                = x[3*k+2];
1036
1037                 /* Calculate distance */
1038                 dx                = ix - jx;
1039                 dy                = iy - jy;
1040                 dz                = iz - jz;
1041
1042                 rbaj              = rb[k];
1043
1044                 fgb     = rbai*dadx[n++];
1045                 fgb_ai  = rbaj*dadx[n++];
1046
1047                 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1048                 fgb     = fgb + fgb_ai;
1049
1050                 tx      = fgb * dx;
1051                 ty      = fgb * dy;
1052                 tz      = fgb * dz;
1053
1054                 fix     = fix + tx;
1055                 fiy     = fiy + ty;
1056                 fiz     = fiz + tz;
1057
1058                 /* Update force on atom aj */
1059                 f[3*k]   = f[3*k] - tx;
1060                 f[3*k+1] = f[3*k+1] - ty;
1061                 f[3*k+2] = f[3*k+2] - tz;
1062             }
1063         }
1064
1065         /* Main part, no exclusions */
1066         for (j = nj1; j < nj2; j++)
1067         {
1068             k = j%natoms;
1069
1070             /* load j atom coordinates */
1071             jx                = x[3*k];
1072             jy                = x[3*k+1];
1073             jz                = x[3*k+2];
1074
1075             /* Calculate distance */
1076             dx                = ix - jx;
1077             dy                = iy - jy;
1078             dz                = iz - jz;
1079
1080             rbaj              = rb[k];
1081
1082             fgb     = rbai*dadx[n++];
1083             fgb_ai  = rbaj*dadx[n++];
1084
1085             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1086             fgb     = fgb + fgb_ai;
1087
1088             tx      = fgb * dx;
1089             ty      = fgb * dy;
1090             tz      = fgb * dz;
1091
1092             fix     = fix + tx;
1093             fiy     = fiy + ty;
1094             fiz     = fiz + tz;
1095
1096             /* Update force on atom aj */
1097             f[3*k]   = f[3*k] - tx;
1098             f[3*k+1] = f[3*k+1] - ty;
1099             f[3*k+2] = f[3*k+2] - tz;
1100         }
1101         /* Update force and shift forces on atom ai */
1102         f[3*i]   = f[3*i] + fix;
1103         f[3*i+1] = f[3*i+1] + fiy;
1104         f[3*i+2] = f[3*i+2] + fiz;
1105     }
1106
1107     return 0;
1108 }