Merge remote-tracking branch 'gerrit/release-4-6'
[alexxy/gromacs.git] / src / gromacs / mdlib / genborn_allvsall.c
1 /*
2  *                This source code is part of
3  *
4  *                 G   R   O   M   A   C   S
5  *
6  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7  * Copyright (c) 2001-2009, The GROMACS Development Team
8  *
9  * Gromacs is a library for molecular simulation and trajectory analysis,
10  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11  * a full list of developers and information, check out http://www.gromacs.org
12  *
13  * This program is free software; you can redistribute it and/or modify it under 
14  * the terms of the GNU Lesser General Public License as published by the Free 
15  * Software Foundation; either version 2 of the License, or (at your option) any 
16  * later version.
17  * As a special exception, you may use this file as part of a free software
18  * library without restriction.  Specifically, if other files instantiate
19  * templates or use macros or inline functions from this file, or you compile
20  * this file and link it with other files to produce an executable, this
21  * file does not by itself cause the resulting executable to be covered by
22  * the GNU Lesser General Public License.  
23  *
24  * In plain-speak: do not worry about classes/macros/templates either - only
25  * changes to the library have to be LGPL, not an application linking with it.
26  *
27  * To help fund GROMACS development, we humbly ask that you cite
28  * the papers people have written on it - you can find them on the website!
29  */
30 #ifdef HAVE_CONFIG_H
31 #include <config.h>
32 #endif
33
34 #include <math.h>
35 #include "types/simple.h"
36
37 #include "vec.h"
38 #include "smalloc.h"
39
40 #include "partdec.h"
41 #include "network.h"
42 #include "physics.h"
43 #include "genborn.h"
44 #include "genborn_allvsall.h"
45
46
47 typedef struct 
48 {
49     int *      jindex_gb;
50     int **     exclusion_mask_gb;
51
52 gmx_allvsallgb2_data_t;
53
54 static int 
55 calc_maxoffset(int i,int natoms)
56 {
57     int maxoffset;
58     
59     if ((natoms % 2) == 1)
60     {
61         /* Odd number of atoms, easy */
62         maxoffset = natoms/2;
63     }
64     else if ((natoms % 4) == 0)
65     {
66         /* Multiple of four is hard */
67         if (i < natoms/2)
68         {
69             if ((i % 2) == 0)
70             {
71                 maxoffset=natoms/2;
72             }
73             else
74             {
75                 maxoffset=natoms/2-1;
76             }
77         }
78         else
79         {
80             if ((i % 2) == 1)
81             {
82                 maxoffset=natoms/2;
83             }
84             else
85             {
86                 maxoffset=natoms/2-1;
87             }
88         }
89     }
90     else
91     {
92         /* natoms/2 = odd */
93         if ((i % 2) == 0)
94         {
95             maxoffset=natoms/2;
96         }
97         else
98         {
99             maxoffset=natoms/2-1;
100         }
101     }
102     
103     return maxoffset;
104 }
105
106 static void
107 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t *   aadata,
108                                 t_ilist *                  ilist,
109                                 int                        natoms,
110                                 gmx_bool                       bInclude12,
111                                 gmx_bool                       bInclude13,
112                                 gmx_bool                       bInclude14)
113 {
114     int i,j,k,tp;
115     int a1,a2;
116     int nj0,nj1;
117     int max_offset;
118     int max_excl_offset;
119     int nj;
120     
121     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
122      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
123      * whether they should interact or not. 
124      *
125      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
126      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
127      * we create a jindex array with three elements per i atom: the starting point, the point to
128      * which we need to check exclusions, and the end point.
129      * This way we only have to allocate a short exclusion mask per i atom.
130      */
131     
132     /* Allocate memory for jindex arrays */
133     snew(aadata->jindex_gb,3*natoms);
134     
135     /* Pointer to lists with exclusion masks */
136     snew(aadata->exclusion_mask_gb,natoms);
137     
138     for(i=0;i<natoms;i++)
139     {
140         /* Start */
141         aadata->jindex_gb[3*i]       = i+1;        
142         max_offset = calc_maxoffset(i,natoms);
143
144         /* first check the max range of atoms to EXCLUDE */
145         max_excl_offset = 0;
146         if(!bInclude12)
147         {
148             for(j=0;j<ilist[F_GB12].nr;j+=3)
149             {
150                 a1 = ilist[F_GB12].iatoms[j+1];
151                 a2 = ilist[F_GB12].iatoms[j+2];
152                 
153                 if(a1==i)
154                 {
155                     k = a2-a1;
156                 }
157                 else if(a2==i)
158                 {
159                     k = a1+natoms-a2;
160                 }
161                 else 
162                 {
163                     continue;
164                 }
165                 if(k>0 && k<=max_offset)
166                 {
167                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
168                 }
169             }
170         }
171         if(!bInclude13)
172         {
173             for(j=0;j<ilist[F_GB13].nr;j+=3)
174             {
175                 a1 = ilist[F_GB13].iatoms[j+1];
176                 a2 = ilist[F_GB13].iatoms[j+2];
177                 
178                 
179                 if(a1==i)
180                 {
181                     k = a2-a1;
182                 }
183                 else if(a2==i)
184                 {
185                     k = a1+natoms-a2;
186                 }
187                 else 
188                 {
189                     continue;
190                 }
191                 if(k>0 && k<=max_offset)
192                 {
193                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
194                 }
195             }
196         }
197         if(!bInclude14)
198         {
199             for(j=0;j<ilist[F_GB14].nr;j+=3)
200             {
201                 a1 = ilist[F_GB14].iatoms[j+1];
202                 a2 = ilist[F_GB14].iatoms[j+2];
203                 
204                 
205                 if(a1==i)
206                 {
207                     k = a2-a1;
208                 }
209                 else if(a2==i)
210                 {
211                     k = a1+natoms-a2;
212                 }
213                 else 
214                 {
215                     continue;
216                 }
217                 if(k>0 && k<=max_offset)
218                 {
219                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
220                 }
221             }
222         }
223         max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
224
225         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;        
226         
227         snew(aadata->exclusion_mask_gb[i],max_excl_offset);
228         
229         /* Include everything by default */
230         for(j=0;j<max_excl_offset;j++)
231         {
232             /* Use all-ones to mark interactions that should be present, compatible with SSE */
233             aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
234         }
235         /* Go through exclusions again */
236         if(!bInclude12)
237         {
238             for(j=0;j<ilist[F_GB12].nr;j+=3)
239             {
240                 a1 = ilist[F_GB12].iatoms[j+1];
241                 a2 = ilist[F_GB12].iatoms[j+2];
242                 
243                 if(a1==i)
244                 {
245                     k = a2-a1;
246                 }
247                 else if(a2==i)
248                 {
249                     k = a1+natoms-a2;
250                 }
251                 else 
252                 {
253                     continue;
254                 }
255                 if(k>0 && k<=max_offset)
256                 {
257                     aadata->exclusion_mask_gb[i][k-1] = 0;
258                 }
259             }
260         }
261         if(!bInclude13)
262         {
263             for(j=0;j<ilist[F_GB13].nr;j+=3)
264             {
265                 a1 = ilist[F_GB13].iatoms[j+1];
266                 a2 = ilist[F_GB13].iatoms[j+2];
267                
268                 if(a1==i)
269                 {
270                     k = a2-a1;
271                 }
272                 else if(a2==i)
273                 {
274                     k = a1+natoms-a2;
275                 }
276                 else 
277                 {
278                     continue;
279                 }
280                 if(k>0 && k<=max_offset)
281                 {
282                     aadata->exclusion_mask_gb[i][k-1] = 0;
283                 }
284             }
285         }
286         if(!bInclude14)
287         {
288             for(j=0;j<ilist[F_GB14].nr;j+=3)
289             {
290                 a1 = ilist[F_GB14].iatoms[j+1];
291                 a2 = ilist[F_GB14].iatoms[j+2];
292
293                 if(a1==i)
294                 {
295                     k = a2-a1;
296                 }
297                 else if(a2==i)
298                 {
299                     k = a1+natoms-a2;
300                 }
301                 else 
302                 {
303                     continue;
304                 }
305                 if(k>0 && k<=max_offset)
306                 {
307                     aadata->exclusion_mask_gb[i][k-1] = 0;
308                 }
309             }
310         }
311         
312         /* End */
313         
314         /* End */
315         aadata->jindex_gb[3*i+2] = i+1+max_offset;        
316     }
317 }
318
319
320 static void
321 genborn_allvsall_setup(gmx_allvsallgb2_data_t **  p_aadata,
322                        t_ilist *                  ilist,
323                        int                        natoms,
324                        gmx_bool                       bInclude12,
325                        gmx_bool                       bInclude13,
326                        gmx_bool                       bInclude14)
327 {
328         int i,j,idx;
329         gmx_allvsallgb2_data_t *aadata;
330     real *p;
331     
332         snew(aadata,1);
333         *p_aadata = aadata;
334         
335     setup_gb_exclusions_and_indices(aadata,ilist,natoms,bInclude12,bInclude13,bInclude14);
336 }
337
338
339
340 int
341 genborn_allvsall_calc_still_radii(t_forcerec *           fr,
342                                   t_mdatoms *            mdatoms,
343                                   gmx_genborn_t *        born,
344                                   gmx_localtop_t *       top,
345                                   real *                 x,
346                                   t_commrec *            cr,
347                                   void *                 work)
348 {
349         gmx_allvsallgb2_data_t *aadata;
350         int        natoms;
351         int        ni0,ni1;
352         int        nj0,nj1,nj2;
353         int        i,j,k,n;
354     int *      mask;
355     
356     real       ix,iy,iz;
357     real       jx,jy,jz;
358     real       dx,dy,dz;
359     real       rsq,rinv;
360     real       gpi,rai,vai;
361     real       prod_ai;
362     real       irsq,idr4,idr6;
363     real       raj,rvdw,ratio;
364     real       vaj,ccf,dccf,theta,cosq;
365     real       term,prod,icf4,icf6,gpi2,factor,sinq;
366     
367     natoms              = mdatoms->nr;
368         ni0                 = mdatoms->start;
369         ni1                 = mdatoms->start+mdatoms->homenr;
370     factor  = 0.5*ONE_4PI_EPS0;
371     n = 0;
372     
373     aadata = *((gmx_allvsallgb2_data_t **)work);
374     
375         if(aadata==NULL)
376         {
377                 genborn_allvsall_setup(&aadata,top->idef.il,mdatoms->nr,
378                                FALSE,FALSE,TRUE);
379         *((gmx_allvsallgb2_data_t **)work) = aadata;
380         }
381     
382     
383     for(i=0;i<born->nr;i++)
384     {
385         born->gpol_still_work[i] = 0;
386     }
387     
388     
389         for(i=ni0; i<ni1; i++)
390         {
391                 /* We assume shifts are NOT used for all-vs-all interactions */
392
393                 /* Load i atom data */
394         ix                = x[3*i];
395         iy                = x[3*i+1];
396         iz                = x[3*i+2];
397         
398         gpi               = 0.0;
399
400         rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
401                 vai     = born->vsolv[i];
402                 prod_ai = STILL_P4*vai;
403         
404                 /* Load limits for loop over neighbors */
405                 nj0              = aadata->jindex_gb[3*i];
406                 nj1              = aadata->jindex_gb[3*i+1];
407                 nj2              = aadata->jindex_gb[3*i+2];
408         
409         mask             = aadata->exclusion_mask_gb[i];
410
411         /* Prologue part, including exclusion mask */
412         for(j=nj0; j<nj1; j++,mask++)
413         {          
414             if(*mask!=0)
415             {
416                 k = j%natoms;
417
418                 /* load j atom coordinates */
419                 jx                = x[3*k];
420                 jy                = x[3*k+1];
421                 jz                = x[3*k+2];
422                 
423                 /* Calculate distance */
424                 dx                = ix - jx;      
425                 dy                = iy - jy;      
426                 dz                = iz - jz;      
427                 rsq               = dx*dx+dy*dy+dz*dz;
428                 
429                 /* Calculate 1/r and 1/r2 */
430                 rinv              = gmx_invsqrt(rsq);
431                 irsq  = rinv*rinv;
432                 idr4  = irsq*irsq;
433                 idr6  = idr4*irsq;
434
435                 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
436                 
437                 rvdw  = rai + raj;
438                 
439                 ratio = rsq / (rvdw * rvdw);
440                 vaj   = born->vsolv[k];
441                 
442                 
443                 if(ratio>STILL_P5INV) 
444                 {
445                     ccf=1.0;
446                     dccf=0.0;
447                 }
448                 else
449                 {
450                     theta = ratio*STILL_PIP5;
451                     cosq  = cos(theta);
452                     term  = 0.5*(1.0-cosq);
453                     ccf   = term*term;
454                     sinq  = 1.0 - cosq*cosq;
455                     dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
456                 }
457                 
458                 prod          = STILL_P4*vaj;
459                 icf4          = ccf*idr4;
460                 icf6          = (4*ccf-dccf)*idr6;
461                 
462                 born->gpol_still_work[k] += prod_ai*icf4;
463                 gpi             = gpi+prod*icf4;
464                 
465                 /* Save ai->aj and aj->ai chain rule terms */
466                 fr->dadx[n++]   = prod*icf6;
467                 fr->dadx[n++]   = prod_ai*icf6;   
468                 
469                 /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
470
471             }
472         }
473         
474         /* Main part, no exclusions */
475         for(j=nj1; j<nj2; j++)
476         {       
477             k = j%natoms;
478
479             /* load j atom coordinates */
480             jx                = x[3*k];
481             jy                = x[3*k+1];
482             jz                = x[3*k+2];
483             
484             /* Calculate distance */
485             dx                = ix - jx;      
486             dy                = iy - jy;      
487             dz                = iz - jz;      
488             rsq               = dx*dx+dy*dy+dz*dz;
489             
490             /* Calculate 1/r and 1/r2 */
491             rinv              = gmx_invsqrt(rsq);
492             irsq  = rinv*rinv;
493             idr4  = irsq*irsq;
494             idr6  = idr4*irsq;
495             
496             raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
497             
498             rvdw  = rai + raj;
499             
500             ratio = rsq / (rvdw * rvdw);
501             vaj   = born->vsolv[k];
502             
503             if(ratio>STILL_P5INV) 
504             {
505                 ccf=1.0;
506                 dccf=0.0;
507             }
508             else
509             {
510                 theta = ratio*STILL_PIP5;
511                 cosq  = cos(theta);
512                 term  = 0.5*(1.0-cosq);
513                 ccf   = term*term;
514                 sinq  = 1.0 - cosq*cosq;
515                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
516             }
517             
518             prod          = STILL_P4*vaj;
519             icf4          = ccf*idr4;
520             icf6          = (4*ccf-dccf)*idr6;
521             
522             born->gpol_still_work[k] += prod_ai*icf4;
523             gpi             = gpi+prod*icf4;
524             
525             /* Save ai->aj and aj->ai chain rule terms */
526             fr->dadx[n++]   = prod*icf6;
527             fr->dadx[n++]   = prod_ai*icf6;
528         }
529         born->gpol_still_work[i]+=gpi;
530         }    
531     
532     /* Parallel summations */
533         if(PARTDECOMP(cr))
534         {
535                 gmx_sum(natoms, born->gpol_still_work, cr);
536         }
537         
538         /* Calculate the radii */
539         for(i=0;i<natoms;i++)
540         {
541                 if(born->use[i] != 0)
542                 {
543                         gpi  = born->gpol[i]+born->gpol_still_work[i];
544                         gpi2 = gpi * gpi;
545                         born->bRad[i]   = factor*gmx_invsqrt(gpi2);
546                         fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
547                 }
548         }
549         
550         return 0;
551 }
552
553
554
555 int
556 genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
557                                     t_mdatoms *            mdatoms,
558                                     gmx_genborn_t *        born,
559                                     int                    gb_algorithm,
560                                     gmx_localtop_t *       top,
561                                     real *                 x,
562                                     t_commrec *            cr,
563                                     void *                 work)
564 {
565         gmx_allvsallgb2_data_t *aadata;
566         int        natoms;
567         int        ni0,ni1;
568         int        nj0,nj1,nj2;
569         int        i,j,k,n;
570     int *      mask;
571     
572     real       ix,iy,iz;
573     real       jx,jy,jz;
574     real       dx,dy,dz;
575     real       rsq,rinv;
576     real       prod,raj;
577     real       rai,doffset,rai_inv,rai_inv2,sk_ai,sk2_ai,sum_ai;
578     real       dr,sk,lij,dlij,lij2,lij3,uij2,uij3,diff2,uij,log_term;
579     real       lij_inv,sk2,sk2_rinv,tmp,t1,t2,t3,raj_inv,sum_ai2,sum_ai3,tsum;
580     real       tchain;
581     real       dadxi,dadxj;
582     real       rad,min_rad;
583     
584     natoms              = mdatoms->nr;
585         ni0                 = mdatoms->start;
586         ni1                 = mdatoms->start+mdatoms->homenr;
587
588     n = 0;
589     prod = 0;
590     raj = 0;
591     doffset = born->gb_doffset;
592
593     aadata = *((gmx_allvsallgb2_data_t **)work);
594     
595         if(aadata==NULL)
596         {
597                 genborn_allvsall_setup(&aadata,top->idef.il,mdatoms->nr,
598                                TRUE,TRUE,TRUE);
599         *((gmx_allvsallgb2_data_t **)work) = aadata;
600         }
601     
602     for(i=0;i<born->nr;i++)
603     {
604         born->gpol_hct_work[i] = 0;
605     }
606     
607         for(i=ni0; i<ni1; i++)
608         {
609                 /* We assume shifts are NOT used for all-vs-all interactions */
610                 
611                 /* Load i atom data */
612         ix                = x[3*i];
613         iy                = x[3*i+1];
614         iz                = x[3*i+2];
615         
616                 rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
617                 rai_inv  = 1.0/rai;
618                 
619                 sk_ai    = born->param[i];
620                 sk2_ai   = sk_ai*sk_ai;
621
622         sum_ai   = 0;
623         
624                 /* Load limits for loop over neighbors */
625                 nj0              = aadata->jindex_gb[3*i];
626                 nj1              = aadata->jindex_gb[3*i+1];
627                 nj2              = aadata->jindex_gb[3*i+2];
628         
629         mask             = aadata->exclusion_mask_gb[i];
630                
631         /* Prologue part, including exclusion mask */
632         for(j=nj0; j<nj1; j++,mask++)
633         {          
634             if(*mask!=0)
635             {
636                 k = j%natoms;
637                 
638                 /* load j atom coordinates */
639                 jx                = x[3*k];
640                 jy                = x[3*k+1];
641                 jz                = x[3*k+2];
642                 
643                 /* Calculate distance */
644                 dx                = ix - jx;      
645                 dy                = iy - jy;      
646                 dz                = iz - jz;      
647                 rsq               = dx*dx+dy*dy+dz*dz;
648                 
649                 /* Calculate 1/r and 1/r2 */
650                 rinv              = gmx_invsqrt(rsq);
651                 dr                = rsq*rinv;
652                 
653                 /* sk is precalculated in init_gb() */
654                 sk    = born->param[k];
655                 raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset; 
656                 
657                 /* aj -> ai interaction */
658                                
659                 
660                 if(rai < dr+sk)
661                 {
662                     lij       = 1.0/(dr-sk); 
663                     dlij      = 1.0; 
664                     
665                     if(rai>dr-sk)
666                     {
667                         lij  = rai_inv; 
668                         dlij = 0.0;
669                     }
670                                          
671                     uij      = 1.0/(dr+sk);  
672                     lij2     = lij  * lij;
673                     lij3     = lij2 * lij;
674                     uij2     = uij  * uij;
675                     uij3     = uij2 * uij;
676
677                     diff2    = uij2-lij2; 
678                     
679                     lij_inv  = gmx_invsqrt(lij2); 
680                     sk2      = sk*sk;
681                     sk2_rinv = sk2*rinv;        
682                     prod     = 0.25*sk2_rinv;
683                     
684                     log_term = log(uij*lij_inv); 
685                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
686                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
687                     
688                     if(rai < sk-dr)
689                     {
690                         tmp = tmp + 2.0 * (rai_inv-lij); 
691                     }
692                     
693                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);   
694                     t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr); 
695                     
696                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
697                                         
698                     dadxi = (dlij*t1+t2+t3)*rinv; 
699                     
700                     sum_ai += 0.5*tmp;
701                 }
702                 else
703                 {
704                     dadxi = 0.0;
705                 }
706                                 
707                 /* ai -> aj interaction */
708                 if(raj < dr + sk_ai)
709                 {
710                     lij     = 1.0/(dr-sk_ai);
711                     dlij    = 1.0;
712                     raj_inv = 1.0/raj;
713                     
714                     if(raj>dr-sk_ai)
715                     {
716                         lij = raj_inv;
717                         dlij = 0.0;
718                     }
719                     
720                     lij2     = lij  * lij;
721                     lij3     = lij2 * lij;
722                     
723                     uij      = 1.0/(dr+sk_ai);
724                     uij2     = uij  * uij;
725                     uij3     = uij2 * uij;
726                     
727                     diff2    = uij2-lij2;
728                     
729                     lij_inv  = gmx_invsqrt(lij2);
730                     sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
731                     sk2_rinv = sk2*rinv;
732                     prod     = 0.25 * sk2_rinv;
733                     
734                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
735                     log_term = log(uij*lij_inv);
736                     
737                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
738                     
739                     if(raj<sk_ai-dr)
740                     {
741                         tmp     = tmp + 2.0 * (raj_inv-lij);
742                     }
743                     
744                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
745                     t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
746                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
747                     
748                     dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule  */
749                     
750                     born->gpol_hct_work[k] += 0.5*tmp;
751                 }
752                 else 
753                 {
754                     dadxj = 0.0;
755                 }
756                 fr->dadx[n++] = dadxi;
757                 fr->dadx[n++] = dadxj;
758                 
759             }
760         }
761         
762         /* Main part, no exclusions */
763         for(j=nj1; j<nj2; j++)
764         {       
765             k = j%natoms;
766             
767             /* load j atom coordinates */
768             jx                = x[3*k];
769             jy                = x[3*k+1];
770             jz                = x[3*k+2];
771             
772             /* Calculate distance */
773             dx                = ix - jx;      
774             dy                = iy - jy;      
775             dz                = iz - jz;      
776             rsq               = dx*dx+dy*dy+dz*dz;
777             
778             /* Calculate 1/r and 1/r2 */
779             rinv              = gmx_invsqrt(rsq);
780             dr                = rsq*rinv;
781             
782             /* sk is precalculated in init_gb() */
783             sk    = born->param[k];
784             raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset; 
785             
786             /* aj -> ai interaction */
787             if(rai < dr+sk)
788             {
789                 lij       = 1.0/(dr-sk);
790                 dlij      = 1.0; 
791                 
792                 if(rai>dr-sk)
793                 {
794                     lij  = rai_inv;
795                     dlij = 0.0;
796                 }
797                 
798                 uij      = 1.0/(dr+sk);
799                 lij2     = lij  * lij;
800                 lij3     = lij2 * lij;
801                 uij2     = uij  * uij;
802                 uij3     = uij2 * uij;
803                 
804                 diff2    = uij2-lij2;
805                 
806                 lij_inv  = gmx_invsqrt(lij2);
807                 sk2      = sk*sk;
808                 sk2_rinv = sk2*rinv;    
809                 prod     = 0.25*sk2_rinv;
810                 
811                 log_term = log(uij*lij_inv);
812                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
813                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
814                 
815                 if(rai < sk-dr)
816                 {
817                     tmp = tmp + 2.0 * (rai_inv-lij);
818                 }
819                 
820                 /* duij    = 1.0; */
821                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); 
822                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); 
823                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; 
824                 
825                 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
826                 
827                 sum_ai += 0.5*tmp;
828             }
829             else 
830             {
831                 dadxi = 0.0;
832             }
833
834             /* ai -> aj interaction */
835             if(raj < dr + sk_ai)
836             {
837                 lij     = 1.0/(dr-sk_ai);
838                 dlij    = 1.0;
839                 raj_inv = 1.0/raj;
840                 
841                 if(raj>dr-sk_ai)
842                 {
843                     lij = raj_inv;
844                     dlij = 0.0;
845                 }
846                 
847                 lij2     = lij  * lij;
848                 lij3     = lij2 * lij;
849                 
850                 uij      = 1.0/(dr+sk_ai);
851                 uij2     = uij  * uij;
852                 uij3     = uij2 * uij;
853                 
854                 diff2    = uij2-lij2;
855                 
856                 lij_inv  = gmx_invsqrt(lij2);
857                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
858                 sk2_rinv = sk2*rinv;
859                 prod     = 0.25 * sk2_rinv;
860                 
861                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
862                 log_term = log(uij*lij_inv);
863                 
864                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
865                 
866                 if(raj<sk_ai-dr)
867                 {
868                     tmp     = tmp + 2.0 * (raj_inv-lij);
869                 }
870                 
871                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
872                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
873                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
874                 
875                 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
876                 
877                 born->gpol_hct_work[k] += 0.5*tmp;
878             }
879             else 
880             {
881                 dadxj = 0.0;
882             }
883             fr->dadx[n++] = dadxi;
884             fr->dadx[n++] = dadxj;     
885         }
886         born->gpol_hct_work[i]+=sum_ai;
887         }    
888     
889     /* Parallel summations */
890         if(PARTDECOMP(cr))
891         {
892                 gmx_sum(natoms, born->gpol_hct_work, cr);
893         }
894         
895     if(gb_algorithm==egbHCT)
896     {
897         /* HCT */
898         for(i=0;i<natoms;i++)
899         {
900             if(born->use[i] != 0)
901             {
902                 rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset; 
903                 sum_ai  = 1.0/rai - born->gpol_hct_work[i];
904                 min_rad = rai + born->gb_doffset;
905                 rad     = 1.0/sum_ai; 
906                 
907                 born->bRad[i]   = rad > min_rad ? rad : min_rad;
908                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
909             }
910         }
911         
912     }
913     else
914     {
915         /* OBC */
916         /* Calculate the radii */
917         for(i=0;i<natoms;i++)
918         {
919             if(born->use[i] != 0)
920             {
921                 rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
922                 rai_inv2   = 1.0/rai;
923                 rai        = rai-doffset; 
924                 rai_inv    = 1.0/rai;
925                 sum_ai     = rai * born->gpol_hct_work[i];
926                 sum_ai2    = sum_ai  * sum_ai;
927                 sum_ai3    = sum_ai2 * sum_ai;
928                 
929                 tsum    = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
930                 born->bRad[i] = rai_inv - tsum*rai_inv2;
931                 born->bRad[i] = 1.0 / born->bRad[i];
932                 
933                 fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]);
934                 
935                 tchain  = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
936                 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
937             }
938         }
939     }   
940     return 0;
941 }
942
943
944
945
946
947 int
948 genborn_allvsall_calc_chainrule(t_forcerec *           fr,
949                                 t_mdatoms *            mdatoms,
950                                 gmx_genborn_t *        born,
951                                 real *                 x,
952                                 real *                 f,
953                                 int                    gb_algorithm,
954                                 void *                 work)
955 {
956         gmx_allvsallgb2_data_t *aadata;
957         int        natoms;
958         int        ni0,ni1;
959         int        nj0,nj1,nj2;
960         int        i,j,k,n;
961     int        idx;
962     int *      mask;
963     
964     real       ix,iy,iz;
965     real       fix,fiy,fiz;
966     real       jx,jy,jz;
967     real       dx,dy,dz;
968     real       tx,ty,tz;
969     real       rbai,rbaj,fgb,fgb_ai,rbi;
970     real *     rb;
971     real *     dadx;
972     
973     natoms              = mdatoms->nr;
974         ni0                 = mdatoms->start;
975         ni1                 = mdatoms->start+mdatoms->homenr;
976     dadx                = fr->dadx;
977     
978     aadata = (gmx_allvsallgb2_data_t *)work;
979
980     n = 0;
981     rb = born->work;
982     
983         /* Loop to get the proper form for the Born radius term */
984         if(gb_algorithm==egbSTILL) 
985         {
986                 for(i=0;i<natoms;i++)
987                 {
988                         rbi   = born->bRad[i];
989                         rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
990                 }
991         }
992         else if(gb_algorithm==egbHCT) 
993         {
994                 for(i=0;i<natoms;i++)
995                 {
996                         rbi   = born->bRad[i];
997                         rb[i] = rbi * rbi * fr->dvda[i];
998                 }
999         }
1000         else if(gb_algorithm==egbOBC) 
1001         {
1002                 for(idx=0;idx<natoms;idx++)
1003                 {
1004                         rbi   = born->bRad[idx];
1005                         rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1006                 }
1007         }
1008
1009     for(i=ni0; i<ni1; i++)
1010         {
1011                 /* We assume shifts are NOT used for all-vs-all interactions */
1012                 
1013                 /* Load i atom data */
1014         ix                = x[3*i];
1015         iy                = x[3*i+1];
1016         iz                = x[3*i+2];
1017
1018         fix               = 0;
1019         fiy               = 0;
1020         fiz               = 0;
1021         
1022         rbai              = rb[i];
1023         
1024                 /* Load limits for loop over neighbors */
1025                 nj0              = aadata->jindex_gb[3*i];
1026                 nj1              = aadata->jindex_gb[3*i+1];
1027                 nj2              = aadata->jindex_gb[3*i+2];
1028         
1029         mask             = aadata->exclusion_mask_gb[i];
1030                 
1031         /* Prologue part, including exclusion mask */
1032         for(j=nj0; j<nj1; j++,mask++)
1033         {          
1034             if(*mask!=0)
1035             {
1036                 k = j%natoms;
1037                 
1038                 /* load j atom coordinates */
1039                 jx                = x[3*k];
1040                 jy                = x[3*k+1];
1041                 jz                = x[3*k+2];
1042                 
1043                 /* Calculate distance */
1044                 dx                = ix - jx;      
1045                 dy                = iy - jy;      
1046                 dz                = iz - jz;   
1047                 
1048                 rbaj              = rb[k];
1049                 
1050                 fgb     = rbai*dadx[n++]; 
1051                 fgb_ai  = rbaj*dadx[n++];
1052                 
1053                 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1054                 fgb     = fgb + fgb_ai;
1055                 
1056                 tx      = fgb * dx;
1057                 ty      = fgb * dy;
1058                 tz      = fgb * dz;
1059                 
1060                 fix     = fix + tx;
1061                 fiy     = fiy + ty;
1062                 fiz     = fiz + tz;
1063                 
1064                 /* Update force on atom aj */
1065                 f[3*k]   = f[3*k] - tx;
1066                 f[3*k+1] = f[3*k+1] - ty;
1067                 f[3*k+2] = f[3*k+2] - tz;
1068             }
1069         }
1070         
1071         /* Main part, no exclusions */
1072         for(j=nj1; j<nj2; j++)
1073         {       
1074             k = j%natoms;
1075             
1076             /* load j atom coordinates */
1077             jx                = x[3*k];
1078             jy                = x[3*k+1];
1079             jz                = x[3*k+2];
1080             
1081             /* Calculate distance */
1082             dx                = ix - jx;      
1083             dy                = iy - jy;      
1084             dz                = iz - jz;   
1085             
1086             rbaj              = rb[k];
1087             
1088             fgb     = rbai*dadx[n++]; 
1089             fgb_ai  = rbaj*dadx[n++];
1090
1091             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1092             fgb     = fgb + fgb_ai;
1093             
1094             tx      = fgb * dx;
1095             ty      = fgb * dy;
1096             tz      = fgb * dz;
1097             
1098             fix     = fix + tx;
1099             fiy     = fiy + ty;
1100             fiz     = fiz + tz;
1101             
1102             /* Update force on atom aj */
1103             f[3*k]   = f[3*k] - tx;
1104             f[3*k+1] = f[3*k+1] - ty;
1105             f[3*k+2] = f[3*k+2] - tz;
1106         }
1107                 /* Update force and shift forces on atom ai */
1108                 f[3*i]   = f[3*i] + fix;
1109                 f[3*i+1] = f[3*i+1] + fiy;
1110                 f[3*i+2] = f[3*i+2] + fiz;
1111         }    
1112         
1113         return 0;
1114 }
1115
1116
1117