Update copyright statements and change license to LGPL
[alexxy/gromacs.git] / src / mdlib / genborn_allvsall.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2012, by the GROMACS development team, led by
6  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
7  * others, as listed in the AUTHORS file in the top-level source
8  * directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41 #include "types/simple.h"
42
43 #include "vec.h"
44 #include "smalloc.h"
45
46 #include "partdec.h"
47 #include "network.h"
48 #include "physics.h"
49 #include "genborn.h"
50 #include "genborn_allvsall.h"
51
52
53 typedef struct 
54 {
55     int *      jindex_gb;
56     int **     exclusion_mask_gb;
57
58 gmx_allvsallgb2_data_t;
59
60 static int 
61 calc_maxoffset(int i,int natoms)
62 {
63     int maxoffset;
64     
65     if ((natoms % 2) == 1)
66     {
67         /* Odd number of atoms, easy */
68         maxoffset = natoms/2;
69     }
70     else if ((natoms % 4) == 0)
71     {
72         /* Multiple of four is hard */
73         if (i < natoms/2)
74         {
75             if ((i % 2) == 0)
76             {
77                 maxoffset=natoms/2;
78             }
79             else
80             {
81                 maxoffset=natoms/2-1;
82             }
83         }
84         else
85         {
86             if ((i % 2) == 1)
87             {
88                 maxoffset=natoms/2;
89             }
90             else
91             {
92                 maxoffset=natoms/2-1;
93             }
94         }
95     }
96     else
97     {
98         /* natoms/2 = odd */
99         if ((i % 2) == 0)
100         {
101             maxoffset=natoms/2;
102         }
103         else
104         {
105             maxoffset=natoms/2-1;
106         }
107     }
108     
109     return maxoffset;
110 }
111
112 static void
113 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t *   aadata,
114                                 t_ilist *                  ilist,
115                                 int                        natoms,
116                                 gmx_bool                       bInclude12,
117                                 gmx_bool                       bInclude13,
118                                 gmx_bool                       bInclude14)
119 {
120     int i,j,k,tp;
121     int a1,a2;
122     int nj0,nj1;
123     int max_offset;
124     int max_excl_offset;
125     int nj;
126     
127     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
128      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
129      * whether they should interact or not. 
130      *
131      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
132      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
133      * we create a jindex array with three elements per i atom: the starting point, the point to
134      * which we need to check exclusions, and the end point.
135      * This way we only have to allocate a short exclusion mask per i atom.
136      */
137     
138     /* Allocate memory for jindex arrays */
139     snew(aadata->jindex_gb,3*natoms);
140     
141     /* Pointer to lists with exclusion masks */
142     snew(aadata->exclusion_mask_gb,natoms);
143     
144     for(i=0;i<natoms;i++)
145     {
146         /* Start */
147         aadata->jindex_gb[3*i]       = i+1;        
148         max_offset = calc_maxoffset(i,natoms);
149
150         /* first check the max range of atoms to EXCLUDE */
151         max_excl_offset = 0;
152         if(!bInclude12)
153         {
154             for(j=0;j<ilist[F_GB12].nr;j+=3)
155             {
156                 a1 = ilist[F_GB12].iatoms[j+1];
157                 a2 = ilist[F_GB12].iatoms[j+2];
158                 
159                 if(a1==i)
160                 {
161                     k = a2-a1;
162                 }
163                 else if(a2==i)
164                 {
165                     k = a1+natoms-a2;
166                 }
167                 else 
168                 {
169                     continue;
170                 }
171                 if(k>0 && k<=max_offset)
172                 {
173                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
174                 }
175             }
176         }
177         if(!bInclude13)
178         {
179             for(j=0;j<ilist[F_GB13].nr;j+=3)
180             {
181                 a1 = ilist[F_GB13].iatoms[j+1];
182                 a2 = ilist[F_GB13].iatoms[j+2];
183                 
184                 
185                 if(a1==i)
186                 {
187                     k = a2-a1;
188                 }
189                 else if(a2==i)
190                 {
191                     k = a1+natoms-a2;
192                 }
193                 else 
194                 {
195                     continue;
196                 }
197                 if(k>0 && k<=max_offset)
198                 {
199                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
200                 }
201             }
202         }
203         if(!bInclude14)
204         {
205             for(j=0;j<ilist[F_GB14].nr;j+=3)
206             {
207                 a1 = ilist[F_GB14].iatoms[j+1];
208                 a2 = ilist[F_GB14].iatoms[j+2];
209                 
210                 
211                 if(a1==i)
212                 {
213                     k = a2-a1;
214                 }
215                 else if(a2==i)
216                 {
217                     k = a1+natoms-a2;
218                 }
219                 else 
220                 {
221                     continue;
222                 }
223                 if(k>0 && k<=max_offset)
224                 {
225                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
226                 }
227             }
228         }
229         max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
230
231         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;        
232         
233         snew(aadata->exclusion_mask_gb[i],max_excl_offset);
234         
235         /* Include everything by default */
236         for(j=0;j<max_excl_offset;j++)
237         {
238             /* Use all-ones to mark interactions that should be present, compatible with SSE */
239             aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
240         }
241         /* Go through exclusions again */
242         if(!bInclude12)
243         {
244             for(j=0;j<ilist[F_GB12].nr;j+=3)
245             {
246                 a1 = ilist[F_GB12].iatoms[j+1];
247                 a2 = ilist[F_GB12].iatoms[j+2];
248                 
249                 if(a1==i)
250                 {
251                     k = a2-a1;
252                 }
253                 else if(a2==i)
254                 {
255                     k = a1+natoms-a2;
256                 }
257                 else 
258                 {
259                     continue;
260                 }
261                 if(k>0 && k<=max_offset)
262                 {
263                     aadata->exclusion_mask_gb[i][k-1] = 0;
264                 }
265             }
266         }
267         if(!bInclude13)
268         {
269             for(j=0;j<ilist[F_GB13].nr;j+=3)
270             {
271                 a1 = ilist[F_GB13].iatoms[j+1];
272                 a2 = ilist[F_GB13].iatoms[j+2];
273                
274                 if(a1==i)
275                 {
276                     k = a2-a1;
277                 }
278                 else if(a2==i)
279                 {
280                     k = a1+natoms-a2;
281                 }
282                 else 
283                 {
284                     continue;
285                 }
286                 if(k>0 && k<=max_offset)
287                 {
288                     aadata->exclusion_mask_gb[i][k-1] = 0;
289                 }
290             }
291         }
292         if(!bInclude14)
293         {
294             for(j=0;j<ilist[F_GB14].nr;j+=3)
295             {
296                 a1 = ilist[F_GB14].iatoms[j+1];
297                 a2 = ilist[F_GB14].iatoms[j+2];
298
299                 if(a1==i)
300                 {
301                     k = a2-a1;
302                 }
303                 else if(a2==i)
304                 {
305                     k = a1+natoms-a2;
306                 }
307                 else 
308                 {
309                     continue;
310                 }
311                 if(k>0 && k<=max_offset)
312                 {
313                     aadata->exclusion_mask_gb[i][k-1] = 0;
314                 }
315             }
316         }
317         
318         /* End */
319         
320         /* End */
321         aadata->jindex_gb[3*i+2] = i+1+max_offset;        
322     }
323 }
324
325
326 static void
327 genborn_allvsall_setup(gmx_allvsallgb2_data_t **  p_aadata,
328                        t_ilist *                  ilist,
329                        int                        natoms,
330                        gmx_bool                       bInclude12,
331                        gmx_bool                       bInclude13,
332                        gmx_bool                       bInclude14)
333 {
334         int i,j,idx;
335         gmx_allvsallgb2_data_t *aadata;
336     real *p;
337     
338         snew(aadata,1);
339         *p_aadata = aadata;
340         
341     setup_gb_exclusions_and_indices(aadata,ilist,natoms,bInclude12,bInclude13,bInclude14);
342 }
343
344
345
346 int
347 genborn_allvsall_calc_still_radii(t_forcerec *           fr,
348                                   t_mdatoms *            mdatoms,
349                                   gmx_genborn_t *        born,
350                                   gmx_localtop_t *       top,
351                                   real *                 x,
352                                   t_commrec *            cr,
353                                   void *                 work)
354 {
355         gmx_allvsallgb2_data_t *aadata;
356         int        natoms;
357         int        ni0,ni1;
358         int        nj0,nj1,nj2;
359         int        i,j,k,n;
360     int *      mask;
361     
362     real       ix,iy,iz;
363     real       jx,jy,jz;
364     real       dx,dy,dz;
365     real       rsq,rinv;
366     real       gpi,rai,vai;
367     real       prod_ai;
368     real       irsq,idr4,idr6;
369     real       raj,rvdw,ratio;
370     real       vaj,ccf,dccf,theta,cosq;
371     real       term,prod,icf4,icf6,gpi2,factor,sinq;
372     
373     natoms              = mdatoms->nr;
374         ni0                 = mdatoms->start;
375         ni1                 = mdatoms->start+mdatoms->homenr;
376     factor  = 0.5*ONE_4PI_EPS0;
377     n = 0;
378     
379     aadata = *((gmx_allvsallgb2_data_t **)work);
380     
381         if(aadata==NULL)
382         {
383                 genborn_allvsall_setup(&aadata,top->idef.il,mdatoms->nr,
384                                FALSE,FALSE,TRUE);
385         *((gmx_allvsallgb2_data_t **)work) = aadata;
386         }
387     
388     
389     for(i=0;i<born->nr;i++)
390     {
391         born->gpol_still_work[i] = 0;
392     }
393     
394     
395         for(i=ni0; i<ni1; i++)
396         {
397                 /* We assume shifts are NOT used for all-vs-all interactions */
398
399                 /* Load i atom data */
400         ix                = x[3*i];
401         iy                = x[3*i+1];
402         iz                = x[3*i+2];
403         
404         gpi               = 0.0;
405
406         rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
407                 vai     = born->vsolv[i];
408                 prod_ai = STILL_P4*vai;
409         
410                 /* Load limits for loop over neighbors */
411                 nj0              = aadata->jindex_gb[3*i];
412                 nj1              = aadata->jindex_gb[3*i+1];
413                 nj2              = aadata->jindex_gb[3*i+2];
414         
415         mask             = aadata->exclusion_mask_gb[i];
416
417         /* Prologue part, including exclusion mask */
418         for(j=nj0; j<nj1; j++,mask++)
419         {          
420             if(*mask!=0)
421             {
422                 k = j%natoms;
423
424                 /* load j atom coordinates */
425                 jx                = x[3*k];
426                 jy                = x[3*k+1];
427                 jz                = x[3*k+2];
428                 
429                 /* Calculate distance */
430                 dx                = ix - jx;      
431                 dy                = iy - jy;      
432                 dz                = iz - jz;      
433                 rsq               = dx*dx+dy*dy+dz*dz;
434                 
435                 /* Calculate 1/r and 1/r2 */
436                 rinv              = gmx_invsqrt(rsq);
437                 irsq  = rinv*rinv;
438                 idr4  = irsq*irsq;
439                 idr6  = idr4*irsq;
440
441                 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
442                 
443                 rvdw  = rai + raj;
444                 
445                 ratio = rsq / (rvdw * rvdw);
446                 vaj   = born->vsolv[k];
447                 
448                 
449                 if(ratio>STILL_P5INV) 
450                 {
451                     ccf=1.0;
452                     dccf=0.0;
453                 }
454                 else
455                 {
456                     theta = ratio*STILL_PIP5;
457                     cosq  = cos(theta);
458                     term  = 0.5*(1.0-cosq);
459                     ccf   = term*term;
460                     sinq  = 1.0 - cosq*cosq;
461                     dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
462                 }
463                 
464                 prod          = STILL_P4*vaj;
465                 icf4          = ccf*idr4;
466                 icf6          = (4*ccf-dccf)*idr6;
467                 
468                 born->gpol_still_work[k] += prod_ai*icf4;
469                 gpi             = gpi+prod*icf4;
470                 
471                 /* Save ai->aj and aj->ai chain rule terms */
472                 fr->dadx[n++]   = prod*icf6;
473                 fr->dadx[n++]   = prod_ai*icf6;   
474                 
475                 /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
476
477             }
478         }
479         
480         /* Main part, no exclusions */
481         for(j=nj1; j<nj2; j++)
482         {       
483             k = j%natoms;
484
485             /* load j atom coordinates */
486             jx                = x[3*k];
487             jy                = x[3*k+1];
488             jz                = x[3*k+2];
489             
490             /* Calculate distance */
491             dx                = ix - jx;      
492             dy                = iy - jy;      
493             dz                = iz - jz;      
494             rsq               = dx*dx+dy*dy+dz*dz;
495             
496             /* Calculate 1/r and 1/r2 */
497             rinv              = gmx_invsqrt(rsq);
498             irsq  = rinv*rinv;
499             idr4  = irsq*irsq;
500             idr6  = idr4*irsq;
501             
502             raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
503             
504             rvdw  = rai + raj;
505             
506             ratio = rsq / (rvdw * rvdw);
507             vaj   = born->vsolv[k];
508             
509             if(ratio>STILL_P5INV) 
510             {
511                 ccf=1.0;
512                 dccf=0.0;
513             }
514             else
515             {
516                 theta = ratio*STILL_PIP5;
517                 cosq  = cos(theta);
518                 term  = 0.5*(1.0-cosq);
519                 ccf   = term*term;
520                 sinq  = 1.0 - cosq*cosq;
521                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
522             }
523             
524             prod          = STILL_P4*vaj;
525             icf4          = ccf*idr4;
526             icf6          = (4*ccf-dccf)*idr6;
527             
528             born->gpol_still_work[k] += prod_ai*icf4;
529             gpi             = gpi+prod*icf4;
530             
531             /* Save ai->aj and aj->ai chain rule terms */
532             fr->dadx[n++]   = prod*icf6;
533             fr->dadx[n++]   = prod_ai*icf6;
534         }
535         born->gpol_still_work[i]+=gpi;
536         }    
537     
538     /* Parallel summations */
539         if(PARTDECOMP(cr))
540         {
541                 gmx_sum(natoms, born->gpol_still_work, cr);
542         }
543         
544         /* Calculate the radii */
545         for(i=0;i<natoms;i++)
546         {
547                 if(born->use[i] != 0)
548                 {
549                         gpi  = born->gpol[i]+born->gpol_still_work[i];
550                         gpi2 = gpi * gpi;
551                         born->bRad[i]   = factor*gmx_invsqrt(gpi2);
552                         fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
553                 }
554         }
555         
556         return 0;
557 }
558
559
560
561 int
562 genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
563                                     t_mdatoms *            mdatoms,
564                                     gmx_genborn_t *        born,
565                                     int                    gb_algorithm,
566                                     gmx_localtop_t *       top,
567                                     real *                 x,
568                                     t_commrec *            cr,
569                                     void *                 work)
570 {
571         gmx_allvsallgb2_data_t *aadata;
572         int        natoms;
573         int        ni0,ni1;
574         int        nj0,nj1,nj2;
575         int        i,j,k,n;
576     int *      mask;
577     
578     real       ix,iy,iz;
579     real       jx,jy,jz;
580     real       dx,dy,dz;
581     real       rsq,rinv;
582     real       prod,raj;
583     real       rai,doffset,rai_inv,rai_inv2,sk_ai,sk2_ai,sum_ai;
584     real       dr,sk,lij,dlij,lij2,lij3,uij2,uij3,diff2,uij,log_term;
585     real       lij_inv,sk2,sk2_rinv,tmp,t1,t2,t3,raj_inv,sum_ai2,sum_ai3,tsum;
586     real       tchain;
587     real       dadxi,dadxj;
588     real       rad,min_rad;
589     
590     natoms              = mdatoms->nr;
591         ni0                 = mdatoms->start;
592         ni1                 = mdatoms->start+mdatoms->homenr;
593
594     n = 0;
595     prod = 0;
596     raj = 0;
597     doffset = born->gb_doffset;
598
599     aadata = *((gmx_allvsallgb2_data_t **)work);
600     
601         if(aadata==NULL)
602         {
603                 genborn_allvsall_setup(&aadata,top->idef.il,mdatoms->nr,
604                                TRUE,TRUE,TRUE);
605         *((gmx_allvsallgb2_data_t **)work) = aadata;
606         }
607     
608     for(i=0;i<born->nr;i++)
609     {
610         born->gpol_hct_work[i] = 0;
611     }
612     
613         for(i=ni0; i<ni1; i++)
614         {
615                 /* We assume shifts are NOT used for all-vs-all interactions */
616                 
617                 /* Load i atom data */
618         ix                = x[3*i];
619         iy                = x[3*i+1];
620         iz                = x[3*i+2];
621         
622                 rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
623                 rai_inv  = 1.0/rai;
624                 
625                 sk_ai    = born->param[i];
626                 sk2_ai   = sk_ai*sk_ai;
627
628         sum_ai   = 0;
629         
630                 /* Load limits for loop over neighbors */
631                 nj0              = aadata->jindex_gb[3*i];
632                 nj1              = aadata->jindex_gb[3*i+1];
633                 nj2              = aadata->jindex_gb[3*i+2];
634         
635         mask             = aadata->exclusion_mask_gb[i];
636                
637         /* Prologue part, including exclusion mask */
638         for(j=nj0; j<nj1; j++,mask++)
639         {          
640             if(*mask!=0)
641             {
642                 k = j%natoms;
643                 
644                 /* load j atom coordinates */
645                 jx                = x[3*k];
646                 jy                = x[3*k+1];
647                 jz                = x[3*k+2];
648                 
649                 /* Calculate distance */
650                 dx                = ix - jx;      
651                 dy                = iy - jy;      
652                 dz                = iz - jz;      
653                 rsq               = dx*dx+dy*dy+dz*dz;
654                 
655                 /* Calculate 1/r and 1/r2 */
656                 rinv              = gmx_invsqrt(rsq);
657                 dr                = rsq*rinv;
658                 
659                 /* sk is precalculated in init_gb() */
660                 sk    = born->param[k];
661                 raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset; 
662                 
663                 /* aj -> ai interaction */
664                                
665                 
666                 if(rai < dr+sk)
667                 {
668                     lij       = 1.0/(dr-sk); 
669                     dlij      = 1.0; 
670                     
671                     if(rai>dr-sk)
672                     {
673                         lij  = rai_inv; 
674                         dlij = 0.0;
675                     }
676                                          
677                     uij      = 1.0/(dr+sk);  
678                     lij2     = lij  * lij;
679                     lij3     = lij2 * lij;
680                     uij2     = uij  * uij;
681                     uij3     = uij2 * uij;
682
683                     diff2    = uij2-lij2; 
684                     
685                     lij_inv  = gmx_invsqrt(lij2); 
686                     sk2      = sk*sk;
687                     sk2_rinv = sk2*rinv;        
688                     prod     = 0.25*sk2_rinv;
689                     
690                     log_term = log(uij*lij_inv); 
691                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
692                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
693                     
694                     if(rai < sk-dr)
695                     {
696                         tmp = tmp + 2.0 * (rai_inv-lij); 
697                     }
698                     
699                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);   
700                     t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr); 
701                     
702                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
703                                         
704                     dadxi = (dlij*t1+t2+t3)*rinv; 
705                     
706                     sum_ai += 0.5*tmp;
707                 }
708                 else
709                 {
710                     dadxi = 0.0;
711                 }
712                                 
713                 /* ai -> aj interaction */
714                 if(raj < dr + sk_ai)
715                 {
716                     lij     = 1.0/(dr-sk_ai);
717                     dlij    = 1.0;
718                     raj_inv = 1.0/raj;
719                     
720                     if(raj>dr-sk_ai)
721                     {
722                         lij = raj_inv;
723                         dlij = 0.0;
724                     }
725                     
726                     lij2     = lij  * lij;
727                     lij3     = lij2 * lij;
728                     
729                     uij      = 1.0/(dr+sk_ai);
730                     uij2     = uij  * uij;
731                     uij3     = uij2 * uij;
732                     
733                     diff2    = uij2-lij2;
734                     
735                     lij_inv  = gmx_invsqrt(lij2);
736                     sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
737                     sk2_rinv = sk2*rinv;
738                     prod     = 0.25 * sk2_rinv;
739                     
740                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
741                     log_term = log(uij*lij_inv);
742                     
743                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
744                     
745                     if(raj<sk_ai-dr)
746                     {
747                         tmp     = tmp + 2.0 * (raj_inv-lij);
748                     }
749                     
750                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
751                     t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
752                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
753                     
754                     dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule  */
755                     
756                     born->gpol_hct_work[k] += 0.5*tmp;
757                 }
758                 else 
759                 {
760                     dadxj = 0.0;
761                 }
762                 fr->dadx[n++] = dadxi;
763                 fr->dadx[n++] = dadxj;
764                 
765             }
766         }
767         
768         /* Main part, no exclusions */
769         for(j=nj1; j<nj2; j++)
770         {       
771             k = j%natoms;
772             
773             /* load j atom coordinates */
774             jx                = x[3*k];
775             jy                = x[3*k+1];
776             jz                = x[3*k+2];
777             
778             /* Calculate distance */
779             dx                = ix - jx;      
780             dy                = iy - jy;      
781             dz                = iz - jz;      
782             rsq               = dx*dx+dy*dy+dz*dz;
783             
784             /* Calculate 1/r and 1/r2 */
785             rinv              = gmx_invsqrt(rsq);
786             dr                = rsq*rinv;
787             
788             /* sk is precalculated in init_gb() */
789             sk    = born->param[k];
790             raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset; 
791             
792             /* aj -> ai interaction */
793             if(rai < dr+sk)
794             {
795                 lij       = 1.0/(dr-sk);
796                 dlij      = 1.0; 
797                 
798                 if(rai>dr-sk)
799                 {
800                     lij  = rai_inv;
801                     dlij = 0.0;
802                 }
803                 
804                 uij      = 1.0/(dr+sk);
805                 lij2     = lij  * lij;
806                 lij3     = lij2 * lij;
807                 uij2     = uij  * uij;
808                 uij3     = uij2 * uij;
809                 
810                 diff2    = uij2-lij2;
811                 
812                 lij_inv  = gmx_invsqrt(lij2);
813                 sk2      = sk*sk;
814                 sk2_rinv = sk2*rinv;    
815                 prod     = 0.25*sk2_rinv;
816                 
817                 log_term = log(uij*lij_inv);
818                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
819                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
820                 
821                 if(rai < sk-dr)
822                 {
823                     tmp = tmp + 2.0 * (rai_inv-lij);
824                 }
825                 
826                 /* duij    = 1.0; */
827                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); 
828                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); 
829                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; 
830                 
831                 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
832                 
833                 sum_ai += 0.5*tmp;
834             }
835             else 
836             {
837                 dadxi = 0.0;
838             }
839
840             /* ai -> aj interaction */
841             if(raj < dr + sk_ai)
842             {
843                 lij     = 1.0/(dr-sk_ai);
844                 dlij    = 1.0;
845                 raj_inv = 1.0/raj;
846                 
847                 if(raj>dr-sk_ai)
848                 {
849                     lij = raj_inv;
850                     dlij = 0.0;
851                 }
852                 
853                 lij2     = lij  * lij;
854                 lij3     = lij2 * lij;
855                 
856                 uij      = 1.0/(dr+sk_ai);
857                 uij2     = uij  * uij;
858                 uij3     = uij2 * uij;
859                 
860                 diff2    = uij2-lij2;
861                 
862                 lij_inv  = gmx_invsqrt(lij2);
863                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
864                 sk2_rinv = sk2*rinv;
865                 prod     = 0.25 * sk2_rinv;
866                 
867                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
868                 log_term = log(uij*lij_inv);
869                 
870                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
871                 
872                 if(raj<sk_ai-dr)
873                 {
874                     tmp     = tmp + 2.0 * (raj_inv-lij);
875                 }
876                 
877                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
878                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
879                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
880                 
881                 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
882                 
883                 born->gpol_hct_work[k] += 0.5*tmp;
884             }
885             else 
886             {
887                 dadxj = 0.0;
888             }
889             fr->dadx[n++] = dadxi;
890             fr->dadx[n++] = dadxj;     
891         }
892         born->gpol_hct_work[i]+=sum_ai;
893         }    
894     
895     /* Parallel summations */
896         if(PARTDECOMP(cr))
897         {
898                 gmx_sum(natoms, born->gpol_hct_work, cr);
899         }
900         
901     if(gb_algorithm==egbHCT)
902     {
903         /* HCT */
904         for(i=0;i<natoms;i++)
905         {
906             if(born->use[i] != 0)
907             {
908                 rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset; 
909                 sum_ai  = 1.0/rai - born->gpol_hct_work[i];
910                 min_rad = rai + born->gb_doffset;
911                 rad     = 1.0/sum_ai; 
912                 
913                 born->bRad[i]   = rad > min_rad ? rad : min_rad;
914                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
915             }
916         }
917         
918     }
919     else
920     {
921         /* OBC */
922         /* Calculate the radii */
923         for(i=0;i<natoms;i++)
924         {
925             if(born->use[i] != 0)
926             {
927                 rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
928                 rai_inv2   = 1.0/rai;
929                 rai        = rai-doffset; 
930                 rai_inv    = 1.0/rai;
931                 sum_ai     = rai * born->gpol_hct_work[i];
932                 sum_ai2    = sum_ai  * sum_ai;
933                 sum_ai3    = sum_ai2 * sum_ai;
934                 
935                 tsum    = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
936                 born->bRad[i] = rai_inv - tsum*rai_inv2;
937                 born->bRad[i] = 1.0 / born->bRad[i];
938                 
939                 fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]);
940                 
941                 tchain  = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
942                 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
943             }
944         }
945     }   
946     return 0;
947 }
948
949
950
951
952
953 int
954 genborn_allvsall_calc_chainrule(t_forcerec *           fr,
955                                 t_mdatoms *            mdatoms,
956                                 gmx_genborn_t *        born,
957                                 real *                 x,
958                                 real *                 f,
959                                 int                    gb_algorithm,
960                                 void *                 work)
961 {
962         gmx_allvsallgb2_data_t *aadata;
963         int        natoms;
964         int        ni0,ni1;
965         int        nj0,nj1,nj2;
966         int        i,j,k,n;
967     int        idx;
968     int *      mask;
969     
970     real       ix,iy,iz;
971     real       fix,fiy,fiz;
972     real       jx,jy,jz;
973     real       dx,dy,dz;
974     real       tx,ty,tz;
975     real       rbai,rbaj,fgb,fgb_ai,rbi;
976     real *     rb;
977     real *     dadx;
978     
979     natoms              = mdatoms->nr;
980         ni0                 = mdatoms->start;
981         ni1                 = mdatoms->start+mdatoms->homenr;
982     dadx                = fr->dadx;
983     
984     aadata = (gmx_allvsallgb2_data_t *)work;
985
986     n = 0;
987     rb = born->work;
988     
989         /* Loop to get the proper form for the Born radius term */
990         if(gb_algorithm==egbSTILL) 
991         {
992                 for(i=0;i<natoms;i++)
993                 {
994                         rbi   = born->bRad[i];
995                         rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
996                 }
997         }
998         else if(gb_algorithm==egbHCT) 
999         {
1000                 for(i=0;i<natoms;i++)
1001                 {
1002                         rbi   = born->bRad[i];
1003                         rb[i] = rbi * rbi * fr->dvda[i];
1004                 }
1005         }
1006         else if(gb_algorithm==egbOBC) 
1007         {
1008                 for(idx=0;idx<natoms;idx++)
1009                 {
1010                         rbi   = born->bRad[idx];
1011                         rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1012                 }
1013         }
1014
1015     for(i=ni0; i<ni1; i++)
1016         {
1017                 /* We assume shifts are NOT used for all-vs-all interactions */
1018                 
1019                 /* Load i atom data */
1020         ix                = x[3*i];
1021         iy                = x[3*i+1];
1022         iz                = x[3*i+2];
1023
1024         fix               = 0;
1025         fiy               = 0;
1026         fiz               = 0;
1027         
1028         rbai              = rb[i];
1029         
1030                 /* Load limits for loop over neighbors */
1031                 nj0              = aadata->jindex_gb[3*i];
1032                 nj1              = aadata->jindex_gb[3*i+1];
1033                 nj2              = aadata->jindex_gb[3*i+2];
1034         
1035         mask             = aadata->exclusion_mask_gb[i];
1036                 
1037         /* Prologue part, including exclusion mask */
1038         for(j=nj0; j<nj1; j++,mask++)
1039         {          
1040             if(*mask!=0)
1041             {
1042                 k = j%natoms;
1043                 
1044                 /* load j atom coordinates */
1045                 jx                = x[3*k];
1046                 jy                = x[3*k+1];
1047                 jz                = x[3*k+2];
1048                 
1049                 /* Calculate distance */
1050                 dx                = ix - jx;      
1051                 dy                = iy - jy;      
1052                 dz                = iz - jz;   
1053                 
1054                 rbaj              = rb[k];
1055                 
1056                 fgb     = rbai*dadx[n++]; 
1057                 fgb_ai  = rbaj*dadx[n++];
1058                 
1059                 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1060                 fgb     = fgb + fgb_ai;
1061                 
1062                 tx      = fgb * dx;
1063                 ty      = fgb * dy;
1064                 tz      = fgb * dz;
1065                 
1066                 fix     = fix + tx;
1067                 fiy     = fiy + ty;
1068                 fiz     = fiz + tz;
1069                 
1070                 /* Update force on atom aj */
1071                 f[3*k]   = f[3*k] - tx;
1072                 f[3*k+1] = f[3*k+1] - ty;
1073                 f[3*k+2] = f[3*k+2] - tz;
1074             }
1075         }
1076         
1077         /* Main part, no exclusions */
1078         for(j=nj1; j<nj2; j++)
1079         {       
1080             k = j%natoms;
1081             
1082             /* load j atom coordinates */
1083             jx                = x[3*k];
1084             jy                = x[3*k+1];
1085             jz                = x[3*k+2];
1086             
1087             /* Calculate distance */
1088             dx                = ix - jx;      
1089             dy                = iy - jy;      
1090             dz                = iz - jz;   
1091             
1092             rbaj              = rb[k];
1093             
1094             fgb     = rbai*dadx[n++]; 
1095             fgb_ai  = rbaj*dadx[n++];
1096
1097             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1098             fgb     = fgb + fgb_ai;
1099             
1100             tx      = fgb * dx;
1101             ty      = fgb * dy;
1102             tz      = fgb * dz;
1103             
1104             fix     = fix + tx;
1105             fiy     = fiy + ty;
1106             fiz     = fiz + tz;
1107             
1108             /* Update force on atom aj */
1109             f[3*k]   = f[3*k] - tx;
1110             f[3*k+1] = f[3*k+1] - ty;
1111             f[3*k+2] = f[3*k+2] - tz;
1112         }
1113                 /* Update force and shift forces on atom ai */
1114                 f[3*i]   = f[3*i] + fix;
1115                 f[3*i+1] = f[3*i+1] + fiy;
1116                 f[3*i+2] = f[3*i+2] + fiz;
1117         }    
1118         
1119         return 0;
1120 }
1121
1122
1123