File: | gromacs/mdlib/genborn_allvsall.c |
Location: | line 589, column 5 |
Description: | Value stored to 'prod' is never read |
1 | /* |
2 | * This file is part of the GROMACS molecular simulation package. |
3 | * |
4 | * Copyright (c) 1991-2000, University of Groningen, The Netherlands. |
5 | * Copyright (c) 2001-2009, The GROMACS Development Team. |
6 | * Copyright (c) 2010,2014, by the GROMACS development team, led by |
7 | * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, |
8 | * and including many others, as listed in the AUTHORS file in the |
9 | * top-level source directory and at http://www.gromacs.org. |
10 | * |
11 | * GROMACS is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU Lesser General Public License |
13 | * as published by the Free Software Foundation; either version 2.1 |
14 | * of the License, or (at your option) any later version. |
15 | * |
16 | * GROMACS is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | * Lesser General Public License for more details. |
20 | * |
21 | * You should have received a copy of the GNU Lesser General Public |
22 | * License along with GROMACS; if not, see |
23 | * http://www.gnu.org/licenses, or write to the Free Software Foundation, |
24 | * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
25 | * |
26 | * If you want to redistribute modifications to GROMACS, please |
27 | * consider that scientific software is very special. Version |
28 | * control is crucial - bugs must be traceable. We will be happy to |
29 | * consider code for inclusion in the official distribution, but |
30 | * derived work must not be called official GROMACS. Details are found |
31 | * in the README & COPYING files - if they are missing, get the |
32 | * official version at http://www.gromacs.org. |
33 | * |
34 | * To help us fund GROMACS development, we humbly ask that you cite |
35 | * the research papers on the package. Check out http://www.gromacs.org. |
36 | */ |
37 | #ifdef HAVE_CONFIG_H1 |
38 | #include <config.h> |
39 | #endif |
40 | |
41 | #include <math.h> |
42 | #include "types/simple.h" |
43 | |
44 | #include "gromacs/math/vec.h" |
45 | #include "gromacs/utility/smalloc.h" |
46 | |
47 | #include "network.h" |
48 | #include "physics.h" |
49 | #include "genborn.h" |
50 | #include "genborn_allvsall.h" |
51 | |
52 | |
53 | typedef struct |
54 | { |
55 | int * jindex_gb; |
56 | int ** exclusion_mask_gb; |
57 | } |
58 | gmx_allvsallgb2_data_t; |
59 | |
60 | static int |
61 | calc_maxoffset(int i, int natoms) |
62 | { |
63 | int maxoffset; |
64 | |
65 | if ((natoms % 2) == 1) |
66 | { |
67 | /* Odd number of atoms, easy */ |
68 | maxoffset = natoms/2; |
69 | } |
70 | else if ((natoms % 4) == 0) |
71 | { |
72 | /* Multiple of four is hard */ |
73 | if (i < natoms/2) |
74 | { |
75 | if ((i % 2) == 0) |
76 | { |
77 | maxoffset = natoms/2; |
78 | } |
79 | else |
80 | { |
81 | maxoffset = natoms/2-1; |
82 | } |
83 | } |
84 | else |
85 | { |
86 | if ((i % 2) == 1) |
87 | { |
88 | maxoffset = natoms/2; |
89 | } |
90 | else |
91 | { |
92 | maxoffset = natoms/2-1; |
93 | } |
94 | } |
95 | } |
96 | else |
97 | { |
98 | /* natoms/2 = odd */ |
99 | if ((i % 2) == 0) |
100 | { |
101 | maxoffset = natoms/2; |
102 | } |
103 | else |
104 | { |
105 | maxoffset = natoms/2-1; |
106 | } |
107 | } |
108 | |
109 | return maxoffset; |
110 | } |
111 | |
112 | static void |
113 | setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata, |
114 | t_ilist * ilist, |
115 | int natoms, |
116 | gmx_bool bInclude12, |
117 | gmx_bool bInclude13, |
118 | gmx_bool bInclude14) |
119 | { |
120 | int i, j, k, tp; |
121 | int a1, a2; |
122 | int nj0, nj1; |
123 | int max_offset; |
124 | int max_excl_offset; |
125 | int nj; |
126 | |
127 | /* This routine can appear to be a bit complex, but it is mostly book-keeping. |
128 | * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates |
129 | * whether they should interact or not. |
130 | * |
131 | * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction |
132 | * should be present, otherwise 0. Since exclusions typically only occur when i & j are close, |
133 | * we create a jindex array with three elements per i atom: the starting point, the point to |
134 | * which we need to check exclusions, and the end point. |
135 | * This way we only have to allocate a short exclusion mask per i atom. |
136 | */ |
137 | |
138 | /* Allocate memory for jindex arrays */ |
139 | snew(aadata->jindex_gb, 3*natoms)(aadata->jindex_gb) = save_calloc("aadata->jindex_gb", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn_allvsall.c" , 139, (3*natoms), sizeof(*(aadata->jindex_gb))); |
140 | |
141 | /* Pointer to lists with exclusion masks */ |
142 | snew(aadata->exclusion_mask_gb, natoms)(aadata->exclusion_mask_gb) = save_calloc("aadata->exclusion_mask_gb" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn_allvsall.c" , 142, (natoms), sizeof(*(aadata->exclusion_mask_gb))); |
143 | |
144 | for (i = 0; i < natoms; i++) |
145 | { |
146 | /* Start */ |
147 | aadata->jindex_gb[3*i] = i+1; |
148 | max_offset = calc_maxoffset(i, natoms); |
149 | |
150 | /* first check the max range of atoms to EXCLUDE */ |
151 | max_excl_offset = 0; |
152 | if (!bInclude12) |
153 | { |
154 | for (j = 0; j < ilist[F_GB12].nr; j += 3) |
155 | { |
156 | a1 = ilist[F_GB12].iatoms[j+1]; |
157 | a2 = ilist[F_GB12].iatoms[j+2]; |
158 | |
159 | if (a1 == i) |
160 | { |
161 | k = a2-a1; |
162 | } |
163 | else if (a2 == i) |
164 | { |
165 | k = a1+natoms-a2; |
166 | } |
167 | else |
168 | { |
169 | continue; |
170 | } |
171 | if (k > 0 && k <= max_offset) |
172 | { |
173 | max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset; |
174 | } |
175 | } |
176 | } |
177 | if (!bInclude13) |
178 | { |
179 | for (j = 0; j < ilist[F_GB13].nr; j += 3) |
180 | { |
181 | a1 = ilist[F_GB13].iatoms[j+1]; |
182 | a2 = ilist[F_GB13].iatoms[j+2]; |
183 | |
184 | |
185 | if (a1 == i) |
186 | { |
187 | k = a2-a1; |
188 | } |
189 | else if (a2 == i) |
190 | { |
191 | k = a1+natoms-a2; |
192 | } |
193 | else |
194 | { |
195 | continue; |
196 | } |
197 | if (k > 0 && k <= max_offset) |
198 | { |
199 | max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset; |
200 | } |
201 | } |
202 | } |
203 | if (!bInclude14) |
204 | { |
205 | for (j = 0; j < ilist[F_GB14].nr; j += 3) |
206 | { |
207 | a1 = ilist[F_GB14].iatoms[j+1]; |
208 | a2 = ilist[F_GB14].iatoms[j+2]; |
209 | |
210 | |
211 | if (a1 == i) |
212 | { |
213 | k = a2-a1; |
214 | } |
215 | else if (a2 == i) |
216 | { |
217 | k = a1+natoms-a2; |
218 | } |
219 | else |
220 | { |
221 | continue; |
222 | } |
223 | if (k > 0 && k <= max_offset) |
224 | { |
225 | max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset; |
226 | } |
227 | } |
228 | } |
229 | max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset; |
230 | |
231 | aadata->jindex_gb[3*i+1] = i+1+max_excl_offset; |
232 | |
233 | snew(aadata->exclusion_mask_gb[i], max_excl_offset)(aadata->exclusion_mask_gb[i]) = save_calloc("aadata->exclusion_mask_gb[i]" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn_allvsall.c" , 233, (max_excl_offset), sizeof(*(aadata->exclusion_mask_gb [i]))); |
234 | |
235 | /* Include everything by default */ |
236 | for (j = 0; j < max_excl_offset; j++) |
237 | { |
238 | /* Use all-ones to mark interactions that should be present, compatible with SSE */ |
239 | aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF; |
240 | } |
241 | /* Go through exclusions again */ |
242 | if (!bInclude12) |
243 | { |
244 | for (j = 0; j < ilist[F_GB12].nr; j += 3) |
245 | { |
246 | a1 = ilist[F_GB12].iatoms[j+1]; |
247 | a2 = ilist[F_GB12].iatoms[j+2]; |
248 | |
249 | if (a1 == i) |
250 | { |
251 | k = a2-a1; |
252 | } |
253 | else if (a2 == i) |
254 | { |
255 | k = a1+natoms-a2; |
256 | } |
257 | else |
258 | { |
259 | continue; |
260 | } |
261 | if (k > 0 && k <= max_offset) |
262 | { |
263 | aadata->exclusion_mask_gb[i][k-1] = 0; |
264 | } |
265 | } |
266 | } |
267 | if (!bInclude13) |
268 | { |
269 | for (j = 0; j < ilist[F_GB13].nr; j += 3) |
270 | { |
271 | a1 = ilist[F_GB13].iatoms[j+1]; |
272 | a2 = ilist[F_GB13].iatoms[j+2]; |
273 | |
274 | if (a1 == i) |
275 | { |
276 | k = a2-a1; |
277 | } |
278 | else if (a2 == i) |
279 | { |
280 | k = a1+natoms-a2; |
281 | } |
282 | else |
283 | { |
284 | continue; |
285 | } |
286 | if (k > 0 && k <= max_offset) |
287 | { |
288 | aadata->exclusion_mask_gb[i][k-1] = 0; |
289 | } |
290 | } |
291 | } |
292 | if (!bInclude14) |
293 | { |
294 | for (j = 0; j < ilist[F_GB14].nr; j += 3) |
295 | { |
296 | a1 = ilist[F_GB14].iatoms[j+1]; |
297 | a2 = ilist[F_GB14].iatoms[j+2]; |
298 | |
299 | if (a1 == i) |
300 | { |
301 | k = a2-a1; |
302 | } |
303 | else if (a2 == i) |
304 | { |
305 | k = a1+natoms-a2; |
306 | } |
307 | else |
308 | { |
309 | continue; |
310 | } |
311 | if (k > 0 && k <= max_offset) |
312 | { |
313 | aadata->exclusion_mask_gb[i][k-1] = 0; |
314 | } |
315 | } |
316 | } |
317 | |
318 | /* End */ |
319 | |
320 | /* End */ |
321 | aadata->jindex_gb[3*i+2] = i+1+max_offset; |
322 | } |
323 | } |
324 | |
325 | |
326 | static void |
327 | genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata, |
328 | t_ilist * ilist, |
329 | int natoms, |
330 | gmx_bool bInclude12, |
331 | gmx_bool bInclude13, |
332 | gmx_bool bInclude14) |
333 | { |
334 | int i, j, idx; |
335 | gmx_allvsallgb2_data_t *aadata; |
336 | real *p; |
337 | |
338 | snew(aadata, 1)(aadata) = save_calloc("aadata", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn_allvsall.c" , 338, (1), sizeof(*(aadata))); |
339 | *p_aadata = aadata; |
340 | |
341 | setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14); |
342 | } |
343 | |
344 | |
345 | |
346 | int |
347 | genborn_allvsall_calc_still_radii(t_forcerec * fr, |
348 | t_mdatoms * mdatoms, |
349 | gmx_genborn_t * born, |
350 | gmx_localtop_t * top, |
351 | real * x, |
352 | void * work) |
353 | { |
354 | gmx_allvsallgb2_data_t *aadata; |
355 | int natoms; |
356 | int ni0, ni1; |
357 | int nj0, nj1, nj2; |
358 | int i, j, k, n; |
359 | int * mask; |
360 | |
361 | real ix, iy, iz; |
362 | real jx, jy, jz; |
363 | real dx, dy, dz; |
364 | real rsq, rinv; |
365 | real gpi, rai, vai; |
366 | real prod_ai; |
367 | real irsq, idr4, idr6; |
368 | real raj, rvdw, ratio; |
369 | real vaj, ccf, dccf, theta, cosq; |
370 | real term, prod, icf4, icf6, gpi2, factor, sinq; |
371 | |
372 | natoms = mdatoms->nr; |
373 | ni0 = 0; |
374 | ni1 = mdatoms->homenr; |
375 | factor = 0.5*ONE_4PI_EPS0((332.0636930*(4.184))*0.1); |
376 | n = 0; |
377 | |
378 | aadata = *((gmx_allvsallgb2_data_t **)work); |
379 | |
380 | if (aadata == NULL((void*)0)) |
381 | { |
382 | genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr, |
383 | FALSE0, FALSE0, TRUE1); |
384 | *((gmx_allvsallgb2_data_t **)work) = aadata; |
385 | } |
386 | |
387 | |
388 | for (i = 0; i < born->nr; i++) |
389 | { |
390 | born->gpol_still_work[i] = 0; |
391 | } |
392 | |
393 | |
394 | for (i = ni0; i < ni1; i++) |
395 | { |
396 | /* We assume shifts are NOT used for all-vs-all interactions */ |
397 | |
398 | /* Load i atom data */ |
399 | ix = x[3*i]; |
400 | iy = x[3*i+1]; |
401 | iz = x[3*i+2]; |
402 | |
403 | gpi = 0.0; |
404 | |
405 | rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]; |
406 | vai = born->vsolv[i]; |
407 | prod_ai = STILL_P415.236*0.1*(4.184)*vai; |
408 | |
409 | /* Load limits for loop over neighbors */ |
410 | nj0 = aadata->jindex_gb[3*i]; |
411 | nj1 = aadata->jindex_gb[3*i+1]; |
412 | nj2 = aadata->jindex_gb[3*i+2]; |
413 | |
414 | mask = aadata->exclusion_mask_gb[i]; |
415 | |
416 | /* Prologue part, including exclusion mask */ |
417 | for (j = nj0; j < nj1; j++, mask++) |
418 | { |
419 | if (*mask != 0) |
420 | { |
421 | k = j%natoms; |
422 | |
423 | /* load j atom coordinates */ |
424 | jx = x[3*k]; |
425 | jy = x[3*k+1]; |
426 | jz = x[3*k+2]; |
427 | |
428 | /* Calculate distance */ |
429 | dx = ix - jx; |
430 | dy = iy - jy; |
431 | dz = iz - jz; |
432 | rsq = dx*dx+dy*dy+dz*dz; |
433 | |
434 | /* Calculate 1/r and 1/r2 */ |
435 | rinv = gmx_invsqrt(rsq)gmx_software_invsqrt(rsq); |
436 | irsq = rinv*rinv; |
437 | idr4 = irsq*irsq; |
438 | idr6 = idr4*irsq; |
439 | |
440 | raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]; |
441 | |
442 | rvdw = rai + raj; |
443 | |
444 | ratio = rsq / (rvdw * rvdw); |
445 | vaj = born->vsolv[k]; |
446 | |
447 | |
448 | if (ratio > STILL_P5INV(1.0/1.254)) |
449 | { |
450 | ccf = 1.0; |
451 | dccf = 0.0; |
452 | } |
453 | else |
454 | { |
455 | theta = ratio*STILL_PIP5(3.14159265358979323846*1.254); |
456 | cosq = cos(theta); |
457 | term = 0.5*(1.0-cosq); |
458 | ccf = term*term; |
459 | sinq = 1.0 - cosq*cosq; |
460 | dccf = 2.0*term*sinq*gmx_invsqrt(sinq)gmx_software_invsqrt(sinq)*theta; |
461 | } |
462 | |
463 | prod = STILL_P415.236*0.1*(4.184)*vaj; |
464 | icf4 = ccf*idr4; |
465 | icf6 = (4*ccf-dccf)*idr6; |
466 | |
467 | born->gpol_still_work[k] += prod_ai*icf4; |
468 | gpi = gpi+prod*icf4; |
469 | |
470 | /* Save ai->aj and aj->ai chain rule terms */ |
471 | fr->dadx[n++] = prod*icf6; |
472 | fr->dadx[n++] = prod_ai*icf6; |
473 | |
474 | /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */ |
475 | |
476 | } |
477 | } |
478 | |
479 | /* Main part, no exclusions */ |
480 | for (j = nj1; j < nj2; j++) |
481 | { |
482 | k = j%natoms; |
483 | |
484 | /* load j atom coordinates */ |
485 | jx = x[3*k]; |
486 | jy = x[3*k+1]; |
487 | jz = x[3*k+2]; |
488 | |
489 | /* Calculate distance */ |
490 | dx = ix - jx; |
491 | dy = iy - jy; |
492 | dz = iz - jz; |
493 | rsq = dx*dx+dy*dy+dz*dz; |
494 | |
495 | /* Calculate 1/r and 1/r2 */ |
496 | rinv = gmx_invsqrt(rsq)gmx_software_invsqrt(rsq); |
497 | irsq = rinv*rinv; |
498 | idr4 = irsq*irsq; |
499 | idr6 = idr4*irsq; |
500 | |
501 | raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]; |
502 | |
503 | rvdw = rai + raj; |
504 | |
505 | ratio = rsq / (rvdw * rvdw); |
506 | vaj = born->vsolv[k]; |
507 | |
508 | if (ratio > STILL_P5INV(1.0/1.254)) |
509 | { |
510 | ccf = 1.0; |
511 | dccf = 0.0; |
512 | } |
513 | else |
514 | { |
515 | theta = ratio*STILL_PIP5(3.14159265358979323846*1.254); |
516 | cosq = cos(theta); |
517 | term = 0.5*(1.0-cosq); |
518 | ccf = term*term; |
519 | sinq = 1.0 - cosq*cosq; |
520 | dccf = 2.0*term*sinq*gmx_invsqrt(sinq)gmx_software_invsqrt(sinq)*theta; |
521 | } |
522 | |
523 | prod = STILL_P415.236*0.1*(4.184)*vaj; |
524 | icf4 = ccf*idr4; |
525 | icf6 = (4*ccf-dccf)*idr6; |
526 | |
527 | born->gpol_still_work[k] += prod_ai*icf4; |
528 | gpi = gpi+prod*icf4; |
529 | |
530 | /* Save ai->aj and aj->ai chain rule terms */ |
531 | fr->dadx[n++] = prod*icf6; |
532 | fr->dadx[n++] = prod_ai*icf6; |
533 | } |
534 | born->gpol_still_work[i] += gpi; |
535 | } |
536 | |
537 | /* Parallel summations would go here if ever implemented with DD */ |
538 | |
539 | /* Calculate the radii */ |
540 | for (i = 0; i < natoms; i++) |
541 | { |
542 | if (born->use[i] != 0) |
543 | { |
544 | gpi = born->gpol[i]+born->gpol_still_work[i]; |
545 | gpi2 = gpi * gpi; |
546 | born->bRad[i] = factor*gmx_invsqrt(gpi2)gmx_software_invsqrt(gpi2); |
547 | fr->invsqrta[i] = gmx_invsqrt(born->bRad[i])gmx_software_invsqrt(born->bRad[i]); |
548 | } |
549 | } |
550 | |
551 | return 0; |
552 | } |
553 | |
554 | |
555 | |
556 | int |
557 | genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr, |
558 | t_mdatoms * mdatoms, |
559 | gmx_genborn_t * born, |
560 | int gb_algorithm, |
561 | gmx_localtop_t * top, |
562 | real * x, |
563 | void * work) |
564 | { |
565 | gmx_allvsallgb2_data_t *aadata; |
566 | int natoms; |
567 | int ni0, ni1; |
568 | int nj0, nj1, nj2; |
569 | int i, j, k, n; |
570 | int * mask; |
571 | |
572 | real ix, iy, iz; |
573 | real jx, jy, jz; |
574 | real dx, dy, dz; |
575 | real rsq, rinv; |
576 | real prod, raj; |
577 | real rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai; |
578 | real dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term; |
579 | real lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum; |
580 | real tchain; |
581 | real dadxi, dadxj; |
582 | real rad, min_rad; |
583 | |
584 | natoms = mdatoms->nr; |
585 | ni0 = 0; |
586 | ni1 = mdatoms->homenr; |
587 | |
588 | n = 0; |
589 | prod = 0; |
Value stored to 'prod' is never read | |
590 | raj = 0; |
591 | doffset = born->gb_doffset; |
592 | |
593 | aadata = *((gmx_allvsallgb2_data_t **)work); |
594 | |
595 | if (aadata == NULL((void*)0)) |
596 | { |
597 | genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr, |
598 | TRUE1, TRUE1, TRUE1); |
599 | *((gmx_allvsallgb2_data_t **)work) = aadata; |
600 | } |
601 | |
602 | for (i = 0; i < born->nr; i++) |
603 | { |
604 | born->gpol_hct_work[i] = 0; |
605 | } |
606 | |
607 | for (i = ni0; i < ni1; i++) |
608 | { |
609 | /* We assume shifts are NOT used for all-vs-all interactions */ |
610 | |
611 | /* Load i atom data */ |
612 | ix = x[3*i]; |
613 | iy = x[3*i+1]; |
614 | iz = x[3*i+2]; |
615 | |
616 | rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset; |
617 | rai_inv = 1.0/rai; |
618 | |
619 | sk_ai = born->param[i]; |
620 | sk2_ai = sk_ai*sk_ai; |
621 | |
622 | sum_ai = 0; |
623 | |
624 | /* Load limits for loop over neighbors */ |
625 | nj0 = aadata->jindex_gb[3*i]; |
626 | nj1 = aadata->jindex_gb[3*i+1]; |
627 | nj2 = aadata->jindex_gb[3*i+2]; |
628 | |
629 | mask = aadata->exclusion_mask_gb[i]; |
630 | |
631 | /* Prologue part, including exclusion mask */ |
632 | for (j = nj0; j < nj1; j++, mask++) |
633 | { |
634 | if (*mask != 0) |
635 | { |
636 | k = j%natoms; |
637 | |
638 | /* load j atom coordinates */ |
639 | jx = x[3*k]; |
640 | jy = x[3*k+1]; |
641 | jz = x[3*k+2]; |
642 | |
643 | /* Calculate distance */ |
644 | dx = ix - jx; |
645 | dy = iy - jy; |
646 | dz = iz - jz; |
647 | rsq = dx*dx+dy*dy+dz*dz; |
648 | |
649 | /* Calculate 1/r and 1/r2 */ |
650 | rinv = gmx_invsqrt(rsq)gmx_software_invsqrt(rsq); |
651 | dr = rsq*rinv; |
652 | |
653 | /* sk is precalculated in init_gb() */ |
654 | sk = born->param[k]; |
655 | raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset; |
656 | |
657 | /* aj -> ai interaction */ |
658 | |
659 | |
660 | if (rai < dr+sk) |
661 | { |
662 | lij = 1.0/(dr-sk); |
663 | dlij = 1.0; |
664 | |
665 | if (rai > dr-sk) |
666 | { |
667 | lij = rai_inv; |
668 | dlij = 0.0; |
669 | } |
670 | |
671 | uij = 1.0/(dr+sk); |
672 | lij2 = lij * lij; |
673 | lij3 = lij2 * lij; |
674 | uij2 = uij * uij; |
675 | uij3 = uij2 * uij; |
676 | |
677 | diff2 = uij2-lij2; |
678 | |
679 | lij_inv = gmx_invsqrt(lij2)gmx_software_invsqrt(lij2); |
680 | sk2 = sk*sk; |
681 | sk2_rinv = sk2*rinv; |
682 | prod = 0.25*sk2_rinv; |
683 | |
684 | log_term = log(uij*lij_inv); |
685 | /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ |
686 | tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); |
687 | |
688 | if (rai < sk-dr) |
689 | { |
690 | tmp = tmp + 2.0 * (rai_inv-lij); |
691 | } |
692 | |
693 | t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); |
694 | t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr); |
695 | |
696 | t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; |
697 | |
698 | dadxi = (dlij*t1+t2+t3)*rinv; |
699 | |
700 | sum_ai += 0.5*tmp; |
701 | } |
702 | else |
703 | { |
704 | dadxi = 0.0; |
705 | } |
706 | |
707 | /* ai -> aj interaction */ |
708 | if (raj < dr + sk_ai) |
709 | { |
710 | lij = 1.0/(dr-sk_ai); |
711 | dlij = 1.0; |
712 | raj_inv = 1.0/raj; |
713 | |
714 | if (raj > dr-sk_ai) |
715 | { |
716 | lij = raj_inv; |
717 | dlij = 0.0; |
718 | } |
719 | |
720 | lij2 = lij * lij; |
721 | lij3 = lij2 * lij; |
722 | |
723 | uij = 1.0/(dr+sk_ai); |
724 | uij2 = uij * uij; |
725 | uij3 = uij2 * uij; |
726 | |
727 | diff2 = uij2-lij2; |
728 | |
729 | lij_inv = gmx_invsqrt(lij2)gmx_software_invsqrt(lij2); |
730 | sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */ |
731 | sk2_rinv = sk2*rinv; |
732 | prod = 0.25 * sk2_rinv; |
733 | |
734 | /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ |
735 | log_term = log(uij*lij_inv); |
736 | |
737 | tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); |
738 | |
739 | if (raj < sk_ai-dr) |
740 | { |
741 | tmp = tmp + 2.0 * (raj_inv-lij); |
742 | } |
743 | |
744 | t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); |
745 | t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); |
746 | t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; |
747 | |
748 | dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ |
749 | |
750 | born->gpol_hct_work[k] += 0.5*tmp; |
751 | } |
752 | else |
753 | { |
754 | dadxj = 0.0; |
755 | } |
756 | fr->dadx[n++] = dadxi; |
757 | fr->dadx[n++] = dadxj; |
758 | |
759 | } |
760 | } |
761 | |
762 | /* Main part, no exclusions */ |
763 | for (j = nj1; j < nj2; j++) |
764 | { |
765 | k = j%natoms; |
766 | |
767 | /* load j atom coordinates */ |
768 | jx = x[3*k]; |
769 | jy = x[3*k+1]; |
770 | jz = x[3*k+2]; |
771 | |
772 | /* Calculate distance */ |
773 | dx = ix - jx; |
774 | dy = iy - jy; |
775 | dz = iz - jz; |
776 | rsq = dx*dx+dy*dy+dz*dz; |
777 | |
778 | /* Calculate 1/r and 1/r2 */ |
779 | rinv = gmx_invsqrt(rsq)gmx_software_invsqrt(rsq); |
780 | dr = rsq*rinv; |
781 | |
782 | /* sk is precalculated in init_gb() */ |
783 | sk = born->param[k]; |
784 | raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset; |
785 | |
786 | /* aj -> ai interaction */ |
787 | if (rai < dr+sk) |
788 | { |
789 | lij = 1.0/(dr-sk); |
790 | dlij = 1.0; |
791 | |
792 | if (rai > dr-sk) |
793 | { |
794 | lij = rai_inv; |
795 | dlij = 0.0; |
796 | } |
797 | |
798 | uij = 1.0/(dr+sk); |
799 | lij2 = lij * lij; |
800 | lij3 = lij2 * lij; |
801 | uij2 = uij * uij; |
802 | uij3 = uij2 * uij; |
803 | |
804 | diff2 = uij2-lij2; |
805 | |
806 | lij_inv = gmx_invsqrt(lij2)gmx_software_invsqrt(lij2); |
807 | sk2 = sk*sk; |
808 | sk2_rinv = sk2*rinv; |
809 | prod = 0.25*sk2_rinv; |
810 | |
811 | log_term = log(uij*lij_inv); |
812 | /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ |
813 | tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); |
814 | |
815 | if (rai < sk-dr) |
816 | { |
817 | tmp = tmp + 2.0 * (rai_inv-lij); |
818 | } |
819 | |
820 | /* duij = 1.0; */ |
821 | t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); |
822 | t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); |
823 | t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; |
824 | |
825 | dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ |
826 | |
827 | sum_ai += 0.5*tmp; |
828 | } |
829 | else |
830 | { |
831 | dadxi = 0.0; |
832 | } |
833 | |
834 | /* ai -> aj interaction */ |
835 | if (raj < dr + sk_ai) |
836 | { |
837 | lij = 1.0/(dr-sk_ai); |
838 | dlij = 1.0; |
839 | raj_inv = 1.0/raj; |
840 | |
841 | if (raj > dr-sk_ai) |
842 | { |
843 | lij = raj_inv; |
844 | dlij = 0.0; |
845 | } |
846 | |
847 | lij2 = lij * lij; |
848 | lij3 = lij2 * lij; |
849 | |
850 | uij = 1.0/(dr+sk_ai); |
851 | uij2 = uij * uij; |
852 | uij3 = uij2 * uij; |
853 | |
854 | diff2 = uij2-lij2; |
855 | |
856 | lij_inv = gmx_invsqrt(lij2)gmx_software_invsqrt(lij2); |
857 | sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */ |
858 | sk2_rinv = sk2*rinv; |
859 | prod = 0.25 * sk2_rinv; |
860 | |
861 | /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ |
862 | log_term = log(uij*lij_inv); |
863 | |
864 | tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); |
865 | |
866 | if (raj < sk_ai-dr) |
867 | { |
868 | tmp = tmp + 2.0 * (raj_inv-lij); |
869 | } |
870 | |
871 | t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); |
872 | t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); |
873 | t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; |
874 | |
875 | dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ |
876 | |
877 | born->gpol_hct_work[k] += 0.5*tmp; |
878 | } |
879 | else |
880 | { |
881 | dadxj = 0.0; |
882 | } |
883 | fr->dadx[n++] = dadxi; |
884 | fr->dadx[n++] = dadxj; |
885 | } |
886 | born->gpol_hct_work[i] += sum_ai; |
887 | } |
888 | |
889 | /* Parallel summations would go here if ever implemented with DD */ |
890 | |
891 | if (gb_algorithm == egbHCT) |
892 | { |
893 | /* HCT */ |
894 | for (i = 0; i < natoms; i++) |
895 | { |
896 | if (born->use[i] != 0) |
897 | { |
898 | rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset; |
899 | sum_ai = 1.0/rai - born->gpol_hct_work[i]; |
900 | min_rad = rai + born->gb_doffset; |
901 | rad = 1.0/sum_ai; |
902 | |
903 | born->bRad[i] = rad > min_rad ? rad : min_rad; |
904 | fr->invsqrta[i] = gmx_invsqrt(born->bRad[i])gmx_software_invsqrt(born->bRad[i]); |
905 | } |
906 | } |
907 | |
908 | } |
909 | else |
910 | { |
911 | /* OBC */ |
912 | /* Calculate the radii */ |
913 | for (i = 0; i < natoms; i++) |
914 | { |
915 | if (born->use[i] != 0) |
916 | { |
917 | rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]; |
918 | rai_inv2 = 1.0/rai; |
919 | rai = rai-doffset; |
920 | rai_inv = 1.0/rai; |
921 | sum_ai = rai * born->gpol_hct_work[i]; |
922 | sum_ai2 = sum_ai * sum_ai; |
923 | sum_ai3 = sum_ai2 * sum_ai; |
924 | |
925 | tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3); |
926 | born->bRad[i] = rai_inv - tsum*rai_inv2; |
927 | born->bRad[i] = 1.0 / born->bRad[i]; |
928 | |
929 | fr->invsqrta[i] = gmx_invsqrt(born->bRad[i])gmx_software_invsqrt(born->bRad[i]); |
930 | |
931 | tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2); |
932 | born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2; |
933 | } |
934 | } |
935 | } |
936 | return 0; |
937 | } |
938 | |
939 | |
940 | |
941 | |
942 | |
943 | int |
944 | genborn_allvsall_calc_chainrule(t_forcerec * fr, |
945 | t_mdatoms * mdatoms, |
946 | gmx_genborn_t * born, |
947 | real * x, |
948 | real * f, |
949 | int gb_algorithm, |
950 | void * work) |
951 | { |
952 | gmx_allvsallgb2_data_t *aadata; |
953 | int natoms; |
954 | int ni0, ni1; |
955 | int nj0, nj1, nj2; |
956 | int i, j, k, n; |
957 | int idx; |
958 | int * mask; |
959 | |
960 | real ix, iy, iz; |
961 | real fix, fiy, fiz; |
962 | real jx, jy, jz; |
963 | real dx, dy, dz; |
964 | real tx, ty, tz; |
965 | real rbai, rbaj, fgb, fgb_ai, rbi; |
966 | real * rb; |
967 | real * dadx; |
968 | |
969 | natoms = mdatoms->nr; |
970 | ni0 = 0; |
971 | ni1 = mdatoms->homenr; |
972 | dadx = fr->dadx; |
973 | |
974 | aadata = (gmx_allvsallgb2_data_t *)work; |
975 | |
976 | n = 0; |
977 | rb = born->work; |
978 | |
979 | /* Loop to get the proper form for the Born radius term */ |
980 | if (gb_algorithm == egbSTILL) |
981 | { |
982 | for (i = 0; i < natoms; i++) |
983 | { |
984 | rbi = born->bRad[i]; |
985 | rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0((332.0636930*(4.184))*0.1); |
986 | } |
987 | } |
988 | else if (gb_algorithm == egbHCT) |
989 | { |
990 | for (i = 0; i < natoms; i++) |
991 | { |
992 | rbi = born->bRad[i]; |
993 | rb[i] = rbi * rbi * fr->dvda[i]; |
994 | } |
995 | } |
996 | else if (gb_algorithm == egbOBC) |
997 | { |
998 | for (idx = 0; idx < natoms; idx++) |
999 | { |
1000 | rbi = born->bRad[idx]; |
1001 | rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx]; |
1002 | } |
1003 | } |
1004 | |
1005 | for (i = ni0; i < ni1; i++) |
1006 | { |
1007 | /* We assume shifts are NOT used for all-vs-all interactions */ |
1008 | |
1009 | /* Load i atom data */ |
1010 | ix = x[3*i]; |
1011 | iy = x[3*i+1]; |
1012 | iz = x[3*i+2]; |
1013 | |
1014 | fix = 0; |
1015 | fiy = 0; |
1016 | fiz = 0; |
1017 | |
1018 | rbai = rb[i]; |
1019 | |
1020 | /* Load limits for loop over neighbors */ |
1021 | nj0 = aadata->jindex_gb[3*i]; |
1022 | nj1 = aadata->jindex_gb[3*i+1]; |
1023 | nj2 = aadata->jindex_gb[3*i+2]; |
1024 | |
1025 | mask = aadata->exclusion_mask_gb[i]; |
1026 | |
1027 | /* Prologue part, including exclusion mask */ |
1028 | for (j = nj0; j < nj1; j++, mask++) |
1029 | { |
1030 | if (*mask != 0) |
1031 | { |
1032 | k = j%natoms; |
1033 | |
1034 | /* load j atom coordinates */ |
1035 | jx = x[3*k]; |
1036 | jy = x[3*k+1]; |
1037 | jz = x[3*k+2]; |
1038 | |
1039 | /* Calculate distance */ |
1040 | dx = ix - jx; |
1041 | dy = iy - jy; |
1042 | dz = iz - jz; |
1043 | |
1044 | rbaj = rb[k]; |
1045 | |
1046 | fgb = rbai*dadx[n++]; |
1047 | fgb_ai = rbaj*dadx[n++]; |
1048 | |
1049 | /* Total force between ai and aj is the sum of ai->aj and aj->ai */ |
1050 | fgb = fgb + fgb_ai; |
1051 | |
1052 | tx = fgb * dx; |
1053 | ty = fgb * dy; |
1054 | tz = fgb * dz; |
1055 | |
1056 | fix = fix + tx; |
1057 | fiy = fiy + ty; |
1058 | fiz = fiz + tz; |
1059 | |
1060 | /* Update force on atom aj */ |
1061 | f[3*k] = f[3*k] - tx; |
1062 | f[3*k+1] = f[3*k+1] - ty; |
1063 | f[3*k+2] = f[3*k+2] - tz; |
1064 | } |
1065 | } |
1066 | |
1067 | /* Main part, no exclusions */ |
1068 | for (j = nj1; j < nj2; j++) |
1069 | { |
1070 | k = j%natoms; |
1071 | |
1072 | /* load j atom coordinates */ |
1073 | jx = x[3*k]; |
1074 | jy = x[3*k+1]; |
1075 | jz = x[3*k+2]; |
1076 | |
1077 | /* Calculate distance */ |
1078 | dx = ix - jx; |
1079 | dy = iy - jy; |
1080 | dz = iz - jz; |
1081 | |
1082 | rbaj = rb[k]; |
1083 | |
1084 | fgb = rbai*dadx[n++]; |
1085 | fgb_ai = rbaj*dadx[n++]; |
1086 | |
1087 | /* Total force between ai and aj is the sum of ai->aj and aj->ai */ |
1088 | fgb = fgb + fgb_ai; |
1089 | |
1090 | tx = fgb * dx; |
1091 | ty = fgb * dy; |
1092 | tz = fgb * dz; |
1093 | |
1094 | fix = fix + tx; |
1095 | fiy = fiy + ty; |
1096 | fiz = fiz + tz; |
1097 | |
1098 | /* Update force on atom aj */ |
1099 | f[3*k] = f[3*k] - tx; |
1100 | f[3*k+1] = f[3*k+1] - ty; |
1101 | f[3*k+2] = f[3*k+2] - tz; |
1102 | } |
1103 | /* Update force and shift forces on atom ai */ |
1104 | f[3*i] = f[3*i] + fix; |
1105 | f[3*i+1] = f[3*i+1] + fiy; |
1106 | f[3*i+2] = f[3*i+2] + fiz; |
1107 | } |
1108 | |
1109 | return 0; |
1110 | } |