File: | gromacs/mdlib/genborn.c |
Location: | line 735, column 5 |
Description: | Value stored to 'prod' is never read |
1 | /* |
2 | * This file is part of the GROMACS molecular simulation package. |
3 | * |
4 | * Copyright (c) 1991-2000, University of Groningen, The Netherlands. |
5 | * Copyright (c) 2001-2008, The GROMACS development team. |
6 | * Copyright (c) 2013,2014, by the GROMACS development team, led by |
7 | * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, |
8 | * and including many others, as listed in the AUTHORS file in the |
9 | * top-level source directory and at http://www.gromacs.org. |
10 | * |
11 | * GROMACS is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU Lesser General Public License |
13 | * as published by the Free Software Foundation; either version 2.1 |
14 | * of the License, or (at your option) any later version. |
15 | * |
16 | * GROMACS is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | * Lesser General Public License for more details. |
20 | * |
21 | * You should have received a copy of the GNU Lesser General Public |
22 | * License along with GROMACS; if not, see |
23 | * http://www.gnu.org/licenses, or write to the Free Software Foundation, |
24 | * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
25 | * |
26 | * If you want to redistribute modifications to GROMACS, please |
27 | * consider that scientific software is very special. Version |
28 | * control is crucial - bugs must be traceable. We will be happy to |
29 | * consider code for inclusion in the official distribution, but |
30 | * derived work must not be called official GROMACS. Details are found |
31 | * in the README & COPYING files - if they are missing, get the |
32 | * official version at http://www.gromacs.org. |
33 | * |
34 | * To help us fund GROMACS development, we humbly ask that you cite |
35 | * the research papers on the package. Check out http://www.gromacs.org. |
36 | */ |
37 | |
38 | #ifdef HAVE_CONFIG_H1 |
39 | #include <config.h> |
40 | #endif |
41 | |
42 | #include <math.h> |
43 | #include <string.h> |
44 | |
45 | #include "typedefs.h" |
46 | #include "types/commrec.h" |
47 | #include "gromacs/utility/smalloc.h" |
48 | #include "genborn.h" |
49 | #include "gromacs/math/vec.h" |
50 | #include "gromacs/fileio/pdbio.h" |
51 | #include "names.h" |
52 | #include "physics.h" |
53 | #include "domdec.h" |
54 | #include "network.h" |
55 | #include "gromacs/utility/fatalerror.h" |
56 | #include "mtop_util.h" |
57 | #include "pbc.h" |
58 | #include "nrnb.h" |
59 | #include "bondf.h" |
60 | |
61 | #include "gromacs/utility/gmxmpi.h" |
62 | |
63 | #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER |
64 | # ifdef GMX_DOUBLE |
65 | # include "genborn_sse2_double.h" |
66 | # include "genborn_allvsall_sse2_double.h" |
67 | # else |
68 | # include "genborn_sse2_single.h" |
69 | # include "genborn_allvsall_sse2_single.h" |
70 | # endif /* GMX_DOUBLE */ |
71 | #endif /* SSE or AVX present */ |
72 | |
73 | #include "genborn_allvsall.h" |
74 | |
75 | /*#define DISABLE_SSE*/ |
76 | |
77 | typedef struct { |
78 | int shift; |
79 | int naj; |
80 | int *aj; |
81 | int aj_nalloc; |
82 | } gbtmpnbl_t; |
83 | |
84 | typedef struct gbtmpnbls { |
85 | int nlist; |
86 | gbtmpnbl_t *list; |
87 | int list_nalloc; |
88 | } t_gbtmpnbls; |
89 | |
90 | /* This function is exactly the same as the one in bondfree.c. The reason |
91 | * it is copied here is that the bonded gb-interactions are evaluated |
92 | * not in calc_bonds, but rather in calc_gb_forces |
93 | */ |
94 | static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx) |
95 | { |
96 | if (pbc) |
97 | { |
98 | return pbc_dx_aiuc(pbc, xi, xj, dx); |
99 | } |
100 | else |
101 | { |
102 | rvec_sub(xi, xj, dx); |
103 | return CENTRAL(((2*1 +1)*(2*1 +1)*(2*2 +1))/2); |
104 | } |
105 | } |
106 | |
107 | static int init_gb_nblist(int natoms, t_nblist *nl) |
108 | { |
109 | nl->maxnri = natoms*4; |
110 | nl->maxnrj = 0; |
111 | nl->nri = 0; |
112 | nl->nrj = 0; |
113 | nl->iinr = NULL((void*)0); |
114 | nl->gid = NULL((void*)0); |
115 | nl->shift = NULL((void*)0); |
116 | nl->jindex = NULL((void*)0); |
117 | nl->jjnr = NULL((void*)0); |
118 | /*nl->nltype = nltype;*/ |
119 | |
120 | srenew(nl->iinr, nl->maxnri)(nl->iinr) = save_realloc("nl->iinr", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 120, (nl->iinr), (nl->maxnri), sizeof(*(nl->iinr)) ); |
121 | srenew(nl->gid, nl->maxnri)(nl->gid) = save_realloc("nl->gid", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 121, (nl->gid), (nl->maxnri), sizeof(*(nl->gid))); |
122 | srenew(nl->shift, nl->maxnri)(nl->shift) = save_realloc("nl->shift", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 122, (nl->shift), (nl->maxnri), sizeof(*(nl->shift ))); |
123 | srenew(nl->jindex, nl->maxnri+1)(nl->jindex) = save_realloc("nl->jindex", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 123, (nl->jindex), (nl->maxnri+1), sizeof(*(nl->jindex ))); |
124 | |
125 | nl->jindex[0] = 0; |
126 | |
127 | return 0; |
128 | } |
129 | |
130 | |
131 | static int init_gb_still(const t_atomtypes *atype, t_idef *idef, t_atoms *atoms, |
132 | gmx_genborn_t *born, int natoms) |
133 | { |
134 | |
135 | int i, j, i1, i2, k, m, nbond, nang, ia, ib, ic, id, nb, idx, idx2, at; |
136 | int iam, ibm; |
137 | int at0, at1; |
138 | real length, angle; |
139 | real r, ri, rj, ri2, ri3, rj2, r2, r3, r4, rk, ratio, term, h, doffset; |
140 | real p1, p2, p3, factor, cosine, rab, rbc; |
141 | |
142 | real *vsol; |
143 | real *gp; |
144 | |
145 | snew(vsol, natoms)(vsol) = save_calloc("vsol", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 145, (natoms), sizeof(*(vsol))); |
146 | snew(gp, natoms)(gp) = save_calloc("gp", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 146, (natoms), sizeof(*(gp))); |
147 | snew(born->gpol_still_work, natoms+3)(born->gpol_still_work) = save_calloc("born->gpol_still_work" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c", 147, (natoms+3), sizeof(*(born->gpol_still_work))); |
148 | |
149 | at0 = 0; |
150 | at1 = natoms; |
151 | |
152 | doffset = born->gb_doffset; |
153 | |
154 | for (i = 0; i < natoms; i++) |
155 | { |
156 | born->gpol_globalindex[i] = born->vsolv_globalindex[i] = |
157 | born->gb_radius_globalindex[i] = 0; |
158 | } |
159 | |
160 | /* Compute atomic solvation volumes for Still method */ |
161 | for (i = 0; i < natoms; i++) |
162 | { |
163 | ri = atype->gb_radius[atoms->atom[i].type]; |
164 | born->gb_radius_globalindex[i] = ri; |
165 | r3 = ri*ri*ri; |
166 | born->vsolv_globalindex[i] = (4*M_PI3.14159265358979323846/3)*r3; |
167 | } |
168 | |
169 | for (j = 0; j < idef->il[F_GB12].nr; j += 3) |
170 | { |
171 | m = idef->il[F_GB12].iatoms[j]; |
172 | ia = idef->il[F_GB12].iatoms[j+1]; |
173 | ib = idef->il[F_GB12].iatoms[j+2]; |
174 | |
175 | r = 1.01*idef->iparams[m].gb.st; |
176 | |
177 | ri = atype->gb_radius[atoms->atom[ia].type]; |
178 | rj = atype->gb_radius[atoms->atom[ib].type]; |
179 | |
180 | ri2 = ri*ri; |
181 | ri3 = ri2*ri; |
182 | rj2 = rj*rj; |
183 | |
184 | ratio = (rj2-ri2-r*r)/(2*ri*r); |
185 | h = ri*(1+ratio); |
186 | term = (M_PI3.14159265358979323846/3.0)*h*h*(3.0*ri-h); |
187 | |
188 | born->vsolv_globalindex[ia] -= term; |
189 | |
190 | ratio = (ri2-rj2-r*r)/(2*rj*r); |
191 | h = rj*(1+ratio); |
192 | term = (M_PI3.14159265358979323846/3.0)*h*h*(3.0*rj-h); |
193 | |
194 | born->vsolv_globalindex[ib] -= term; |
195 | } |
196 | |
197 | /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still |
198 | method */ |
199 | /* Self */ |
200 | for (j = 0; j < natoms; j++) |
201 | { |
202 | if (born->use_globalindex[j] == 1) |
203 | { |
204 | born->gpol_globalindex[j] = -0.5*ONE_4PI_EPS0((332.0636930*(4.184))*0.1)/ |
205 | (atype->gb_radius[atoms->atom[j].type]-doffset+STILL_P10.073*0.1); |
206 | } |
207 | } |
208 | |
209 | /* 1-2 */ |
210 | for (j = 0; j < idef->il[F_GB12].nr; j += 3) |
211 | { |
212 | m = idef->il[F_GB12].iatoms[j]; |
213 | ia = idef->il[F_GB12].iatoms[j+1]; |
214 | ib = idef->il[F_GB12].iatoms[j+2]; |
215 | |
216 | r = idef->iparams[m].gb.st; |
217 | |
218 | r4 = r*r*r*r; |
219 | |
220 | born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+ |
221 | STILL_P20.921*0.1*(4.184)*born->vsolv_globalindex[ib]/r4; |
222 | born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+ |
223 | STILL_P20.921*0.1*(4.184)*born->vsolv_globalindex[ia]/r4; |
224 | } |
225 | |
226 | /* 1-3 */ |
227 | for (j = 0; j < idef->il[F_GB13].nr; j += 3) |
228 | { |
229 | m = idef->il[F_GB13].iatoms[j]; |
230 | ia = idef->il[F_GB13].iatoms[j+1]; |
231 | ib = idef->il[F_GB13].iatoms[j+2]; |
232 | |
233 | r = idef->iparams[m].gb.st; |
234 | r4 = r*r*r*r; |
235 | |
236 | born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+ |
237 | STILL_P36.211*0.1*(4.184)*born->vsolv_globalindex[ib]/r4; |
238 | born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+ |
239 | STILL_P36.211*0.1*(4.184)*born->vsolv_globalindex[ia]/r4; |
240 | } |
241 | |
242 | sfree(vsol)save_free("vsol", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 242, (vsol)); |
243 | sfree(gp)save_free("gp", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 243, (gp)); |
244 | |
245 | return 0; |
246 | } |
247 | |
248 | /* Initialize all GB datastructs and compute polarization energies */ |
249 | int init_gb(gmx_genborn_t **p_born, |
250 | t_forcerec *fr, const t_inputrec *ir, |
251 | const gmx_mtop_t *mtop, int gb_algorithm) |
252 | { |
253 | int i, j, m, ai, aj, jj, natoms, nalloc; |
254 | real rai, sk, p, doffset; |
255 | |
256 | t_atoms atoms; |
257 | gmx_genborn_t *born; |
258 | gmx_localtop_t *localtop; |
259 | |
260 | natoms = mtop->natoms; |
261 | |
262 | atoms = gmx_mtop_global_atoms(mtop); |
263 | localtop = gmx_mtop_generate_local_top(mtop, ir); |
264 | |
265 | snew(born, 1)(born) = save_calloc("born", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 265, (1), sizeof(*(born))); |
266 | *p_born = born; |
267 | |
268 | born->nr = natoms; |
269 | |
270 | snew(born->drobc, natoms)(born->drobc) = save_calloc("born->drobc", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 270, (natoms), sizeof(*(born->drobc))); |
271 | snew(born->bRad, natoms)(born->bRad) = save_calloc("born->bRad", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 271, (natoms), sizeof(*(born->bRad))); |
272 | |
273 | /* Allocate memory for the global data arrays */ |
274 | snew(born->param_globalindex, natoms+3)(born->param_globalindex) = save_calloc("born->param_globalindex" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c", 274, (natoms+3), sizeof(*(born->param_globalindex))); |
275 | snew(born->gpol_globalindex, natoms+3)(born->gpol_globalindex) = save_calloc("born->gpol_globalindex" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c", 275, (natoms+3), sizeof(*(born->gpol_globalindex))); |
276 | snew(born->vsolv_globalindex, natoms+3)(born->vsolv_globalindex) = save_calloc("born->vsolv_globalindex" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c", 276, (natoms+3), sizeof(*(born->vsolv_globalindex))); |
277 | snew(born->gb_radius_globalindex, natoms+3)(born->gb_radius_globalindex) = save_calloc("born->gb_radius_globalindex" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c", 277, (natoms+3), sizeof(*(born->gb_radius_globalindex))); |
278 | snew(born->use_globalindex, natoms+3)(born->use_globalindex) = save_calloc("born->use_globalindex" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c", 278, (natoms+3), sizeof(*(born->use_globalindex))); |
279 | |
280 | snew(fr->invsqrta, natoms)(fr->invsqrta) = save_calloc("fr->invsqrta", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 280, (natoms), sizeof(*(fr->invsqrta))); |
281 | snew(fr->dvda, natoms)(fr->dvda) = save_calloc("fr->dvda", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 281, (natoms), sizeof(*(fr->dvda))); |
282 | |
283 | fr->dadx = NULL((void*)0); |
284 | fr->dadx_rawptr = NULL((void*)0); |
285 | fr->nalloc_dadx = 0; |
286 | born->gpol_still_work = NULL((void*)0); |
287 | born->gpol_hct_work = NULL((void*)0); |
288 | |
289 | /* snew(born->asurf,natoms); */ |
290 | /* snew(born->dasurf,natoms); */ |
291 | |
292 | /* Initialize the gb neighbourlist */ |
293 | init_gb_nblist(natoms, &(fr->gblist)); |
294 | |
295 | /* Do the Vsites exclusions (if any) */ |
296 | for (i = 0; i < natoms; i++) |
297 | { |
298 | jj = atoms.atom[i].type; |
299 | if (mtop->atomtypes.gb_radius[atoms.atom[i].type] > 0) |
300 | { |
301 | born->use_globalindex[i] = 1; |
302 | } |
303 | else |
304 | { |
305 | born->use_globalindex[i] = 0; |
306 | } |
307 | |
308 | /* If we have a Vsite, put vs_globalindex[i]=0 */ |
309 | if (C6 (fr->nbfp, fr->ntype, jj, jj)(fr->nbfp)[2*((fr->ntype)*(jj)+(jj))] == 0 && |
310 | C12(fr->nbfp, fr->ntype, jj, jj)(fr->nbfp)[2*((fr->ntype)*(jj)+(jj))+1] == 0 && |
311 | atoms.atom[i].q == 0) |
312 | { |
313 | born->use_globalindex[i] = 0; |
314 | } |
315 | } |
316 | |
317 | /* Copy algorithm parameters from inputrecord to local structure */ |
318 | born->obc_alpha = ir->gb_obc_alpha; |
319 | born->obc_beta = ir->gb_obc_beta; |
320 | born->obc_gamma = ir->gb_obc_gamma; |
321 | born->gb_doffset = ir->gb_dielectric_offset; |
322 | born->gb_epsilon_solvent = ir->gb_epsilon_solvent; |
323 | born->epsilon_r = ir->epsilon_r; |
324 | |
325 | doffset = born->gb_doffset; |
326 | |
327 | /* Set the surface tension */ |
328 | born->sa_surface_tension = ir->sa_surface_tension; |
329 | |
330 | /* If Still model, initialise the polarisation energies */ |
331 | if (gb_algorithm == egbSTILL) |
332 | { |
333 | init_gb_still(&(mtop->atomtypes), &(localtop->idef), &atoms, |
334 | born, natoms); |
335 | } |
336 | |
337 | |
338 | /* If HCT/OBC, precalculate the sk*atype->S_hct factors */ |
339 | else if (gb_algorithm == egbHCT || gb_algorithm == egbOBC) |
340 | { |
341 | |
342 | snew(born->gpol_hct_work, natoms+3)(born->gpol_hct_work) = save_calloc("born->gpol_hct_work" , "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c", 342, (natoms+3), sizeof(*(born->gpol_hct_work))); |
343 | |
344 | for (i = 0; i < natoms; i++) |
345 | { |
346 | if (born->use_globalindex[i] == 1) |
347 | { |
348 | rai = mtop->atomtypes.gb_radius[atoms.atom[i].type]-doffset; |
349 | sk = rai * mtop->atomtypes.S_hct[atoms.atom[i].type]; |
350 | born->param_globalindex[i] = sk; |
351 | born->gb_radius_globalindex[i] = rai; |
352 | } |
353 | else |
354 | { |
355 | born->param_globalindex[i] = 0; |
356 | born->gb_radius_globalindex[i] = 0; |
357 | } |
358 | } |
359 | } |
360 | |
361 | /* Allocate memory for work arrays for temporary use */ |
362 | snew(born->work, natoms+4)(born->work) = save_calloc("born->work", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 362, (natoms+4), sizeof(*(born->work))); |
363 | snew(born->count, natoms)(born->count) = save_calloc("born->count", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 363, (natoms), sizeof(*(born->count))); |
364 | snew(born->nblist_work, natoms)(born->nblist_work) = save_calloc("born->nblist_work", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 364, (natoms), sizeof(*(born->nblist_work))); |
365 | |
366 | /* Domain decomposition specific stuff */ |
367 | born->nalloc = 0; |
368 | |
369 | return 0; |
370 | } |
371 | |
372 | |
373 | |
374 | static int |
375 | calc_gb_rad_still(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top, |
376 | rvec x[], t_nblist *nl, |
377 | gmx_genborn_t *born, t_mdatoms *md) |
378 | { |
379 | int i, k, n, nj0, nj1, ai, aj, type; |
380 | int shift; |
381 | real shX, shY, shZ; |
382 | real gpi, dr, dr2, dr4, idr4, rvdw, ratio, ccf, theta, term, rai, raj; |
383 | real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11; |
384 | real rinv, idr2, idr6, vaj, dccf, cosq, sinq, prod, gpi2; |
385 | real factor; |
386 | real vai, prod_ai, icf4, icf6; |
387 | |
388 | factor = 0.5*ONE_4PI_EPS0((332.0636930*(4.184))*0.1); |
389 | n = 0; |
390 | |
391 | for (i = 0; i < born->nr; i++) |
392 | { |
393 | born->gpol_still_work[i] = 0; |
394 | } |
395 | |
396 | for (i = 0; i < nl->nri; i++) |
397 | { |
398 | ai = nl->iinr[i]; |
399 | |
400 | nj0 = nl->jindex[i]; |
401 | nj1 = nl->jindex[i+1]; |
402 | |
403 | /* Load shifts for this list */ |
404 | shift = nl->shift[i]; |
405 | shX = fr->shift_vec[shift][0]; |
406 | shY = fr->shift_vec[shift][1]; |
407 | shZ = fr->shift_vec[shift][2]; |
408 | |
409 | gpi = 0; |
410 | |
411 | rai = top->atomtypes.gb_radius[md->typeA[ai]]; |
412 | vai = born->vsolv[ai]; |
413 | prod_ai = STILL_P415.236*0.1*(4.184)*vai; |
414 | |
415 | /* Load atom i coordinates, add shift vectors */ |
416 | ix1 = shX + x[ai][0]; |
417 | iy1 = shY + x[ai][1]; |
418 | iz1 = shZ + x[ai][2]; |
419 | |
420 | for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++) |
421 | { |
422 | aj = nl->jjnr[k]; |
423 | jx1 = x[aj][0]; |
424 | jy1 = x[aj][1]; |
425 | jz1 = x[aj][2]; |
426 | |
427 | dx11 = ix1-jx1; |
428 | dy11 = iy1-jy1; |
429 | dz11 = iz1-jz1; |
430 | |
431 | dr2 = dx11*dx11+dy11*dy11+dz11*dz11; |
432 | rinv = gmx_invsqrt(dr2)gmx_software_invsqrt(dr2); |
433 | idr2 = rinv*rinv; |
434 | idr4 = idr2*idr2; |
435 | idr6 = idr4*idr2; |
436 | |
437 | raj = top->atomtypes.gb_radius[md->typeA[aj]]; |
438 | |
439 | rvdw = rai + raj; |
440 | |
441 | ratio = dr2 / (rvdw * rvdw); |
442 | vaj = born->vsolv[aj]; |
443 | |
444 | if (ratio > STILL_P5INV(1.0/1.254)) |
445 | { |
446 | ccf = 1.0; |
447 | dccf = 0.0; |
448 | } |
449 | else |
450 | { |
451 | theta = ratio*STILL_PIP5(3.14159265358979323846*1.254); |
452 | cosq = cos(theta); |
453 | term = 0.5*(1.0-cosq); |
454 | ccf = term*term; |
455 | sinq = 1.0 - cosq*cosq; |
456 | dccf = 2.0*term*sinq*gmx_invsqrt(sinq)gmx_software_invsqrt(sinq)*theta; |
457 | } |
458 | |
459 | prod = STILL_P415.236*0.1*(4.184)*vaj; |
460 | icf4 = ccf*idr4; |
461 | icf6 = (4*ccf-dccf)*idr6; |
462 | born->gpol_still_work[aj] += prod_ai*icf4; |
463 | gpi = gpi+prod*icf4; |
464 | |
465 | /* Save ai->aj and aj->ai chain rule terms */ |
466 | fr->dadx[n++] = prod*icf6; |
467 | fr->dadx[n++] = prod_ai*icf6; |
468 | } |
469 | born->gpol_still_work[ai] += gpi; |
470 | } |
471 | |
472 | /* Parallel summations */ |
473 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
474 | { |
475 | dd_atom_sum_real(cr->dd, born->gpol_still_work); |
476 | } |
477 | |
478 | /* Calculate the radii */ |
479 | for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */ |
480 | { |
481 | if (born->use[i] != 0) |
482 | { |
483 | gpi = born->gpol[i]+born->gpol_still_work[i]; |
484 | gpi2 = gpi * gpi; |
485 | born->bRad[i] = factor*gmx_invsqrt(gpi2)gmx_software_invsqrt(gpi2); |
486 | fr->invsqrta[i] = gmx_invsqrt(born->bRad[i])gmx_software_invsqrt(born->bRad[i]); |
487 | } |
488 | } |
489 | |
490 | /* Extra communication required for DD */ |
491 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
492 | { |
493 | dd_atom_spread_real(cr->dd, born->bRad); |
494 | dd_atom_spread_real(cr->dd, fr->invsqrta); |
495 | } |
496 | |
497 | return 0; |
498 | |
499 | } |
500 | |
501 | |
502 | static int |
503 | calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top, |
504 | rvec x[], t_nblist *nl, |
505 | gmx_genborn_t *born, t_mdatoms *md) |
506 | { |
507 | int i, k, n, ai, aj, nj0, nj1, at0, at1; |
508 | int shift; |
509 | real shX, shY, shZ; |
510 | real rai, raj, gpi, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai; |
511 | real rad, min_rad, rinv, rai_inv; |
512 | real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11; |
513 | real lij2, uij2, lij3, uij3, t1, t2, t3; |
514 | real lij_inv, dlij, duij, sk2_rinv, prod, log_term; |
515 | real doffset, raj_inv, dadx_val; |
516 | real *gb_radius; |
517 | |
518 | doffset = born->gb_doffset; |
519 | gb_radius = born->gb_radius; |
520 | |
521 | for (i = 0; i < born->nr; i++) |
522 | { |
523 | born->gpol_hct_work[i] = 0; |
524 | } |
525 | |
526 | /* Keep the compiler happy */ |
527 | n = 0; |
528 | prod = 0; |
529 | |
530 | for (i = 0; i < nl->nri; i++) |
531 | { |
532 | ai = nl->iinr[i]; |
533 | |
534 | nj0 = nl->jindex[i]; |
535 | nj1 = nl->jindex[i+1]; |
536 | |
537 | /* Load shifts for this list */ |
538 | shift = nl->shift[i]; |
539 | shX = fr->shift_vec[shift][0]; |
540 | shY = fr->shift_vec[shift][1]; |
541 | shZ = fr->shift_vec[shift][2]; |
542 | |
543 | rai = gb_radius[ai]; |
544 | rai_inv = 1.0/rai; |
545 | |
546 | sk_ai = born->param[ai]; |
547 | sk2_ai = sk_ai*sk_ai; |
548 | |
549 | /* Load atom i coordinates, add shift vectors */ |
550 | ix1 = shX + x[ai][0]; |
551 | iy1 = shY + x[ai][1]; |
552 | iz1 = shZ + x[ai][2]; |
553 | |
554 | sum_ai = 0; |
555 | |
556 | for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++) |
557 | { |
558 | aj = nl->jjnr[k]; |
559 | |
560 | jx1 = x[aj][0]; |
561 | jy1 = x[aj][1]; |
562 | jz1 = x[aj][2]; |
563 | |
564 | dx11 = ix1 - jx1; |
565 | dy11 = iy1 - jy1; |
566 | dz11 = iz1 - jz1; |
567 | |
568 | dr2 = dx11*dx11+dy11*dy11+dz11*dz11; |
569 | rinv = gmx_invsqrt(dr2)gmx_software_invsqrt(dr2); |
570 | dr = rinv*dr2; |
571 | |
572 | sk = born->param[aj]; |
573 | raj = gb_radius[aj]; |
574 | |
575 | /* aj -> ai interaction */ |
576 | if (rai < dr+sk) |
577 | { |
578 | lij = 1.0/(dr-sk); |
579 | dlij = 1.0; |
580 | |
581 | if (rai > dr-sk) |
582 | { |
583 | lij = rai_inv; |
584 | dlij = 0.0; |
585 | } |
586 | |
587 | lij2 = lij*lij; |
588 | lij3 = lij2*lij; |
589 | |
590 | uij = 1.0/(dr+sk); |
591 | uij2 = uij*uij; |
592 | uij3 = uij2*uij; |
593 | |
594 | diff2 = uij2-lij2; |
595 | |
596 | lij_inv = gmx_invsqrt(lij2)gmx_software_invsqrt(lij2); |
597 | sk2 = sk*sk; |
598 | sk2_rinv = sk2*rinv; |
599 | prod = 0.25*sk2_rinv; |
600 | |
601 | log_term = log(uij*lij_inv); |
602 | |
603 | tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + |
604 | prod*(-diff2); |
605 | |
606 | if (rai < sk-dr) |
607 | { |
608 | tmp = tmp + 2.0 * (rai_inv-lij); |
609 | } |
610 | |
611 | t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); |
612 | t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); |
613 | t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; |
614 | |
615 | dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ |
616 | /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ |
617 | /* rb2 is moved to chainrule */ |
618 | |
619 | sum_ai += 0.5*tmp; |
620 | } |
621 | else |
622 | { |
623 | dadx_val = 0.0; |
624 | } |
625 | fr->dadx[n++] = dadx_val; |
626 | |
627 | |
628 | /* ai -> aj interaction */ |
629 | if (raj < dr + sk_ai) |
630 | { |
631 | lij = 1.0/(dr-sk_ai); |
632 | dlij = 1.0; |
633 | raj_inv = 1.0/raj; |
634 | |
635 | if (raj > dr-sk_ai) |
636 | { |
637 | lij = raj_inv; |
638 | dlij = 0.0; |
639 | } |
640 | |
641 | lij2 = lij * lij; |
642 | lij3 = lij2 * lij; |
643 | |
644 | uij = 1.0/(dr+sk_ai); |
645 | uij2 = uij * uij; |
646 | uij3 = uij2 * uij; |
647 | |
648 | diff2 = uij2-lij2; |
649 | |
650 | lij_inv = gmx_invsqrt(lij2)gmx_software_invsqrt(lij2); |
651 | sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */ |
652 | sk2_rinv = sk2*rinv; |
653 | prod = 0.25 * sk2_rinv; |
654 | |
655 | /* log_term = table_log(uij*lij_inv,born->log_table, |
656 | LOG_TABLE_ACCURACY); */ |
657 | log_term = log(uij*lij_inv); |
658 | |
659 | tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + |
660 | prod*(-diff2); |
661 | |
662 | if (raj < sk_ai-dr) |
663 | { |
664 | tmp = tmp + 2.0 * (raj_inv-lij); |
665 | } |
666 | |
667 | /* duij = 1.0 */ |
668 | t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); |
669 | t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); |
670 | t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; |
671 | |
672 | dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ |
673 | /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule */ |
674 | |
675 | born->gpol_hct_work[aj] += 0.5*tmp; |
676 | } |
677 | else |
678 | { |
679 | dadx_val = 0.0; |
680 | } |
681 | fr->dadx[n++] = dadx_val; |
682 | } |
683 | |
684 | born->gpol_hct_work[ai] += sum_ai; |
685 | } |
686 | |
687 | /* Parallel summations */ |
688 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
689 | { |
690 | dd_atom_sum_real(cr->dd, born->gpol_hct_work); |
691 | } |
692 | |
693 | for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */ |
694 | { |
695 | if (born->use[i] != 0) |
696 | { |
697 | rai = top->atomtypes.gb_radius[md->typeA[i]]-doffset; |
698 | sum_ai = 1.0/rai - born->gpol_hct_work[i]; |
699 | min_rad = rai + doffset; |
700 | rad = 1.0/sum_ai; |
701 | |
702 | born->bRad[i] = rad > min_rad ? rad : min_rad; |
703 | fr->invsqrta[i] = gmx_invsqrt(born->bRad[i])gmx_software_invsqrt(born->bRad[i]); |
704 | } |
705 | } |
706 | |
707 | /* Extra communication required for DD */ |
708 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
709 | { |
710 | dd_atom_spread_real(cr->dd, born->bRad); |
711 | dd_atom_spread_real(cr->dd, fr->invsqrta); |
712 | } |
713 | |
714 | |
715 | return 0; |
716 | } |
717 | |
718 | static int |
719 | calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, gmx_localtop_t *top, |
720 | rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md) |
721 | { |
722 | int i, k, ai, aj, nj0, nj1, n, at0, at1; |
723 | int shift; |
724 | real shX, shY, shZ; |
725 | real rai, raj, gpi, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai; |
726 | real rad, min_rad, sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2; |
727 | real log_term, prod, sk2_rinv, sk_ai, sk2_ai; |
728 | real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11; |
729 | real lij2, uij2, lij3, uij3, dlij, duij, t1, t2, t3; |
730 | real doffset, raj_inv, dadx_val; |
731 | real *gb_radius; |
732 | |
733 | /* Keep the compiler happy */ |
734 | n = 0; |
735 | prod = 0; |
Value stored to 'prod' is never read | |
736 | raj = 0; |
737 | |
738 | doffset = born->gb_doffset; |
739 | gb_radius = born->gb_radius; |
740 | |
741 | for (i = 0; i < born->nr; i++) |
742 | { |
743 | born->gpol_hct_work[i] = 0; |
744 | } |
745 | |
746 | for (i = 0; i < nl->nri; i++) |
747 | { |
748 | ai = nl->iinr[i]; |
749 | |
750 | nj0 = nl->jindex[i]; |
751 | nj1 = nl->jindex[i+1]; |
752 | |
753 | /* Load shifts for this list */ |
754 | shift = nl->shift[i]; |
755 | shX = fr->shift_vec[shift][0]; |
756 | shY = fr->shift_vec[shift][1]; |
757 | shZ = fr->shift_vec[shift][2]; |
758 | |
759 | rai = gb_radius[ai]; |
760 | rai_inv = 1.0/rai; |
761 | |
762 | sk_ai = born->param[ai]; |
763 | sk2_ai = sk_ai*sk_ai; |
764 | |
765 | /* Load atom i coordinates, add shift vectors */ |
766 | ix1 = shX + x[ai][0]; |
767 | iy1 = shY + x[ai][1]; |
768 | iz1 = shZ + x[ai][2]; |
769 | |
770 | sum_ai = 0; |
771 | |
772 | for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++) |
773 | { |
774 | aj = nl->jjnr[k]; |
775 | |
776 | jx1 = x[aj][0]; |
777 | jy1 = x[aj][1]; |
778 | jz1 = x[aj][2]; |
779 | |
780 | dx11 = ix1 - jx1; |
781 | dy11 = iy1 - jy1; |
782 | dz11 = iz1 - jz1; |
783 | |
784 | dr2 = dx11*dx11+dy11*dy11+dz11*dz11; |
785 | rinv = gmx_invsqrt(dr2)gmx_software_invsqrt(dr2); |
786 | dr = dr2*rinv; |
787 | |
788 | /* sk is precalculated in init_gb() */ |
789 | sk = born->param[aj]; |
790 | raj = gb_radius[aj]; |
791 | |
792 | /* aj -> ai interaction */ |
793 | if (rai < dr+sk) |
794 | { |
795 | lij = 1.0/(dr-sk); |
796 | dlij = 1.0; |
797 | |
798 | if (rai > dr-sk) |
799 | { |
800 | lij = rai_inv; |
801 | dlij = 0.0; |
802 | } |
803 | |
804 | uij = 1.0/(dr+sk); |
805 | lij2 = lij * lij; |
806 | lij3 = lij2 * lij; |
807 | uij2 = uij * uij; |
808 | uij3 = uij2 * uij; |
809 | |
810 | diff2 = uij2-lij2; |
811 | |
812 | lij_inv = gmx_invsqrt(lij2)gmx_software_invsqrt(lij2); |
813 | sk2 = sk*sk; |
814 | sk2_rinv = sk2*rinv; |
815 | prod = 0.25*sk2_rinv; |
816 | |
817 | log_term = log(uij*lij_inv); |
818 | |
819 | tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); |
820 | |
821 | if (rai < sk-dr) |
822 | { |
823 | tmp = tmp + 2.0 * (rai_inv-lij); |
824 | } |
825 | |
826 | /* duij = 1.0; */ |
827 | t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); |
828 | t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); |
829 | t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; |
830 | |
831 | dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ |
832 | |
833 | sum_ai += 0.5*tmp; |
834 | } |
835 | else |
836 | { |
837 | dadx_val = 0.0; |
838 | } |
839 | fr->dadx[n++] = dadx_val; |
840 | |
841 | /* ai -> aj interaction */ |
842 | if (raj < dr + sk_ai) |
843 | { |
844 | lij = 1.0/(dr-sk_ai); |
845 | dlij = 1.0; |
846 | raj_inv = 1.0/raj; |
847 | |
848 | if (raj > dr-sk_ai) |
849 | { |
850 | lij = raj_inv; |
851 | dlij = 0.0; |
852 | } |
853 | |
854 | lij2 = lij * lij; |
855 | lij3 = lij2 * lij; |
856 | |
857 | uij = 1.0/(dr+sk_ai); |
858 | uij2 = uij * uij; |
859 | uij3 = uij2 * uij; |
860 | |
861 | diff2 = uij2-lij2; |
862 | |
863 | lij_inv = gmx_invsqrt(lij2)gmx_software_invsqrt(lij2); |
864 | sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */ |
865 | sk2_rinv = sk2*rinv; |
866 | prod = 0.25 * sk2_rinv; |
867 | |
868 | /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */ |
869 | log_term = log(uij*lij_inv); |
870 | |
871 | tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2); |
872 | |
873 | if (raj < sk_ai-dr) |
874 | { |
875 | tmp = tmp + 2.0 * (raj_inv-lij); |
876 | } |
877 | |
878 | t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); |
879 | t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); |
880 | t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; |
881 | |
882 | dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */ |
883 | |
884 | born->gpol_hct_work[aj] += 0.5*tmp; |
885 | |
886 | } |
887 | else |
888 | { |
889 | dadx_val = 0.0; |
890 | } |
891 | fr->dadx[n++] = dadx_val; |
892 | |
893 | } |
894 | born->gpol_hct_work[ai] += sum_ai; |
895 | |
896 | } |
897 | |
898 | /* Parallel summations */ |
899 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
900 | { |
901 | dd_atom_sum_real(cr->dd, born->gpol_hct_work); |
902 | } |
903 | |
904 | for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */ |
905 | { |
906 | if (born->use[i] != 0) |
907 | { |
908 | rai = top->atomtypes.gb_radius[md->typeA[i]]; |
909 | rai_inv2 = 1.0/rai; |
910 | rai = rai-doffset; |
911 | rai_inv = 1.0/rai; |
912 | sum_ai = rai * born->gpol_hct_work[i]; |
913 | sum_ai2 = sum_ai * sum_ai; |
914 | sum_ai3 = sum_ai2 * sum_ai; |
915 | |
916 | tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3); |
917 | born->bRad[i] = rai_inv - tsum*rai_inv2; |
918 | born->bRad[i] = 1.0 / born->bRad[i]; |
919 | |
920 | fr->invsqrta[i] = gmx_invsqrt(born->bRad[i])gmx_software_invsqrt(born->bRad[i]); |
921 | |
922 | tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2); |
923 | born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2; |
924 | } |
925 | } |
926 | |
927 | /* Extra (local) communication required for DD */ |
928 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
929 | { |
930 | dd_atom_spread_real(cr->dd, born->bRad); |
931 | dd_atom_spread_real(cr->dd, fr->invsqrta); |
932 | dd_atom_spread_real(cr->dd, born->drobc); |
933 | } |
934 | |
935 | return 0; |
936 | |
937 | } |
938 | |
939 | |
940 | |
941 | int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *top, |
942 | rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, t_nrnb *nrnb) |
943 | { |
944 | real *p; |
945 | int cnt; |
946 | int ndadx; |
947 | |
948 | if (fr->bAllvsAll && fr->dadx == NULL((void*)0)) |
949 | { |
950 | /* We might need up to 8 atoms of padding before and after, |
951 | * and another 4 units to guarantee SSE alignment. |
952 | */ |
953 | fr->nalloc_dadx = 2*(md->homenr+12)*(md->nr/2+1+12); |
954 | snew(fr->dadx_rawptr, fr->nalloc_dadx)(fr->dadx_rawptr) = save_calloc("fr->dadx_rawptr", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 954, (fr->nalloc_dadx), sizeof(*(fr->dadx_rawptr))); |
955 | fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15))); |
956 | } |
957 | else |
958 | { |
959 | /* In the SSE-enabled gb-loops, when writing to dadx, we |
960 | * always write 2*4 elements at a time, even in the case with only |
961 | * 1-3 j particles, where we only really need to write 2*(1-3) |
962 | * elements. This is because we want dadx to be aligned to a 16- |
963 | * byte boundary, and being able to use _mm_store/load_ps |
964 | */ |
965 | ndadx = 2 * (nl->nrj + 3*nl->nri); |
966 | |
967 | /* First, reallocate the dadx array, we need 3 extra for SSE */ |
968 | if (ndadx + 3 > fr->nalloc_dadx) |
969 | { |
970 | fr->nalloc_dadx = over_alloc_large(ndadx)(int)(1.19*(ndadx) + 1000) + 3; |
971 | srenew(fr->dadx_rawptr, fr->nalloc_dadx)(fr->dadx_rawptr) = save_realloc("fr->dadx_rawptr", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 971, (fr->dadx_rawptr), (fr->nalloc_dadx), sizeof(*(fr ->dadx_rawptr))); |
972 | fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15))); |
973 | } |
974 | } |
975 | |
976 | if (fr->bAllvsAll) |
977 | { |
978 | cnt = md->homenr*(md->nr/2+1); |
979 | |
980 | if (ir->gb_algorithm == egbSTILL) |
981 | { |
982 | #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER) |
983 | if (fr->use_simd_kernels) |
984 | { |
985 | # ifdef GMX_DOUBLE |
986 | genborn_allvsall_calc_still_radii_sse2_double(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb); |
987 | # else |
988 | genborn_allvsall_calc_still_radii_sse2_single(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb); |
989 | # endif |
990 | } |
991 | else |
992 | { |
993 | genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb); |
994 | } |
995 | #else |
996 | genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], &fr->AllvsAll_workgb); |
997 | #endif |
998 | /* 13 flops in outer loop, 47 flops in inner loop */ |
999 | inc_nrnb(nrnb, eNR_BORN_AVA_RADII_STILL, md->homenr*13+cnt*47)(nrnb)->n[eNR_BORN_AVA_RADII_STILL] += md->homenr*13+cnt *47; |
1000 | } |
1001 | else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC) |
1002 | { |
1003 | #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER) |
1004 | if (fr->use_simd_kernels) |
1005 | { |
1006 | # ifdef GMX_DOUBLE |
1007 | genborn_allvsall_calc_hct_obc_radii_sse2_double(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb); |
1008 | # else |
1009 | genborn_allvsall_calc_hct_obc_radii_sse2_single(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb); |
1010 | # endif |
1011 | } |
1012 | else |
1013 | { |
1014 | genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb); |
1015 | } |
1016 | #else |
1017 | genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], &fr->AllvsAll_workgb); |
1018 | #endif |
1019 | /* 24 flops in outer loop, 183 in inner */ |
1020 | inc_nrnb(nrnb, eNR_BORN_AVA_RADII_HCT_OBC, md->homenr*24+cnt*183)(nrnb)->n[eNR_BORN_AVA_RADII_HCT_OBC] += md->homenr*24+ cnt*183; |
1021 | } |
1022 | else |
1023 | { |
1024 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1024, "Bad gb algorithm for all-vs-all interactions"); |
1025 | } |
1026 | return 0; |
1027 | } |
1028 | |
1029 | /* Switch for determining which algorithm to use for Born radii calculation */ |
1030 | #ifdef GMX_DOUBLE |
1031 | |
1032 | #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER) |
1033 | /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */ |
1034 | switch (ir->gb_algorithm) |
1035 | { |
1036 | case egbSTILL: |
1037 | if (fr->use_simd_kernels) |
1038 | { |
1039 | calc_gb_rad_still_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born); |
1040 | } |
1041 | else |
1042 | { |
1043 | calc_gb_rad_still(cr, fr, top, x, nl, born, md); |
1044 | } |
1045 | break; |
1046 | case egbHCT: |
1047 | if (fr->use_simd_kernels) |
1048 | { |
1049 | calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm); |
1050 | } |
1051 | else |
1052 | { |
1053 | calc_gb_rad_hct(cr, fr, top, x, nl, born, md); |
1054 | } |
1055 | break; |
1056 | case egbOBC: |
1057 | if (fr->use_simd_kernels) |
1058 | { |
1059 | calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm); |
1060 | } |
1061 | else |
1062 | { |
1063 | calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md); |
1064 | } |
1065 | break; |
1066 | |
1067 | default: |
1068 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1068, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm); |
1069 | } |
1070 | #else |
1071 | switch (ir->gb_algorithm) |
1072 | { |
1073 | case egbSTILL: |
1074 | calc_gb_rad_still(cr, fr, top, x, nl, born, md); |
1075 | break; |
1076 | case egbHCT: |
1077 | calc_gb_rad_hct(cr, fr, top, x, nl, born, md); |
1078 | break; |
1079 | case egbOBC: |
1080 | calc_gb_rad_obc(cr, fr, top, x, nl, born, md); |
1081 | break; |
1082 | |
1083 | default: |
1084 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1084, "Unknown double precision algorithm for Born radii calculation: %d", ir->gb_algorithm); |
1085 | } |
1086 | |
1087 | #endif |
1088 | |
1089 | #else |
1090 | |
1091 | #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER) |
1092 | /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */ |
1093 | switch (ir->gb_algorithm) |
1094 | { |
1095 | case egbSTILL: |
1096 | if (fr->use_simd_kernels) |
1097 | { |
1098 | calc_gb_rad_still_sse2_single(cr, fr, born->nr, top, x[0], nl, born); |
1099 | } |
1100 | else |
1101 | { |
1102 | calc_gb_rad_still(cr, fr, top, x, nl, born, md); |
1103 | } |
1104 | break; |
1105 | case egbHCT: |
1106 | if (fr->use_simd_kernels) |
1107 | { |
1108 | calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm); |
1109 | } |
1110 | else |
1111 | { |
1112 | calc_gb_rad_hct(cr, fr, top, x, nl, born, md); |
1113 | } |
1114 | break; |
1115 | |
1116 | case egbOBC: |
1117 | if (fr->use_simd_kernels) |
1118 | { |
1119 | calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm); |
1120 | } |
1121 | else |
1122 | { |
1123 | calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md); |
1124 | } |
1125 | break; |
1126 | |
1127 | default: |
1128 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1128, "Unknown sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm); |
1129 | } |
1130 | |
1131 | #else |
1132 | switch (ir->gb_algorithm) |
1133 | { |
1134 | case egbSTILL: |
1135 | calc_gb_rad_still(cr, fr, top, x, nl, born, md); |
1136 | break; |
1137 | case egbHCT: |
1138 | calc_gb_rad_hct(cr, fr, top, x, nl, born, md); |
1139 | break; |
1140 | case egbOBC: |
1141 | calc_gb_rad_obc(cr, fr, top, x, nl, born, md); |
1142 | break; |
1143 | |
1144 | default: |
1145 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1145, "Unknown algorithm for Born radii calculation: %d", ir->gb_algorithm); |
1146 | } |
1147 | |
1148 | #endif /* Single precision sse */ |
1149 | |
1150 | #endif /* Double or single precision */ |
1151 | |
1152 | if (fr->bAllvsAll == FALSE0) |
1153 | { |
1154 | switch (ir->gb_algorithm) |
1155 | { |
1156 | case egbSTILL: |
1157 | /* 17 flops per outer loop iteration, 47 flops per inner loop */ |
1158 | inc_nrnb(nrnb, eNR_BORN_RADII_STILL, nl->nri*17+nl->nrj*47)(nrnb)->n[eNR_BORN_RADII_STILL] += nl->nri*17+nl->nrj *47; |
1159 | break; |
1160 | case egbHCT: |
1161 | case egbOBC: |
1162 | /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */ |
1163 | inc_nrnb(nrnb, eNR_BORN_RADII_HCT_OBC, nl->nri*61+nl->nrj*183)(nrnb)->n[eNR_BORN_RADII_HCT_OBC] += nl->nri*61+nl-> nrj*183; |
1164 | break; |
1165 | |
1166 | default: |
1167 | break; |
1168 | } |
1169 | } |
1170 | |
1171 | return 0; |
1172 | } |
1173 | |
1174 | |
1175 | |
1176 | real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale, |
1177 | real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r, |
1178 | real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph) |
1179 | { |
1180 | int i, j, n0, m, nnn, type, ai, aj; |
1181 | int ki; |
1182 | |
1183 | real isai, isaj; |
1184 | real r, rsq11; |
1185 | real rinv11, iq; |
1186 | real isaprod, qq, gbscale, gbtabscale, Y, F, Geps, Heps2, Fp, VV, FF, rt, eps, eps2; |
1187 | real vgb, fgb, vcoul, fijC, dvdatmp, fscal, dvdaj; |
1188 | real vctot; |
1189 | |
1190 | rvec dx; |
1191 | ivec dt; |
1192 | |
1193 | t_iatom *forceatoms; |
1194 | |
1195 | /* Scale the electrostatics by gb_epsilon_solvent */ |
1196 | facel = facel * ((1.0/epsilon_r) - 1.0/gb_epsilon_solvent); |
1197 | |
1198 | gbtabscale = *p_gbtabscale; |
1199 | vctot = 0.0; |
1200 | |
1201 | for (j = F_GB12; j <= F_GB14; j++) |
1202 | { |
1203 | forceatoms = idef->il[j].iatoms; |
1204 | |
1205 | for (i = 0; i < idef->il[j].nr; ) |
1206 | { |
1207 | /* To avoid reading in the interaction type, we just increment i to pass over |
1208 | * the types in the forceatoms array, this saves some memory accesses |
1209 | */ |
1210 | i++; |
1211 | ai = forceatoms[i++]; |
1212 | aj = forceatoms[i++]; |
1213 | |
1214 | ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); |
1215 | rsq11 = iprod(dx, dx); |
1216 | |
1217 | isai = invsqrta[ai]; |
1218 | iq = (-1)*facel*charge[ai]; |
1219 | |
1220 | rinv11 = gmx_invsqrt(rsq11)gmx_software_invsqrt(rsq11); |
1221 | isaj = invsqrta[aj]; |
1222 | isaprod = isai*isaj; |
1223 | qq = isaprod*iq*charge[aj]; |
1224 | gbscale = isaprod*gbtabscale; |
1225 | r = rsq11*rinv11; |
1226 | rt = r*gbscale; |
1227 | n0 = rt; |
1228 | eps = rt-n0; |
1229 | eps2 = eps*eps; |
1230 | nnn = 4*n0; |
1231 | Y = GBtab[nnn]; |
1232 | F = GBtab[nnn+1]; |
1233 | Geps = eps*GBtab[nnn+2]; |
1234 | Heps2 = eps2*GBtab[nnn+3]; |
1235 | Fp = F+Geps+Heps2; |
1236 | VV = Y+eps*Fp; |
1237 | FF = Fp+Geps+2.0*Heps2; |
1238 | vgb = qq*VV; |
1239 | fijC = qq*FF*gbscale; |
1240 | dvdatmp = -(vgb+fijC*r)*0.5; |
1241 | dvda[aj] = dvda[aj] + dvdatmp*isaj*isaj; |
1242 | dvda[ai] = dvda[ai] + dvdatmp*isai*isai; |
1243 | vctot = vctot + vgb; |
1244 | fgb = -(fijC)*rinv11; |
1245 | |
1246 | if (graph) |
1247 | { |
1248 | ivec_sub(SHIFT_IVEC(graph, ai)((graph)->ishift[ai]), SHIFT_IVEC(graph, aj)((graph)->ishift[aj]), dt); |
1249 | ki = IVEC2IS(dt)(((2*2 +1)*((2*1 +1)*(((dt)[2])+1)+((dt)[1])+1)+((dt)[0])+2)); |
1250 | } |
1251 | |
1252 | for (m = 0; (m < DIM3); m++) /* 15 */ |
1253 | { |
1254 | fscal = fgb*dx[m]; |
1255 | f[ai][m] += fscal; |
1256 | f[aj][m] -= fscal; |
1257 | fshift[ki][m] += fscal; |
1258 | fshift[CENTRAL(((2*1 +1)*(2*1 +1)*(2*2 +1))/2)][m] -= fscal; |
1259 | } |
1260 | } |
1261 | } |
1262 | |
1263 | return vctot; |
1264 | } |
1265 | |
1266 | real calc_gb_selfcorrections(t_commrec *cr, int natoms, |
1267 | real *charge, gmx_genborn_t *born, real *dvda, double facel) |
1268 | { |
1269 | int i, ai, at0, at1; |
1270 | real rai, e, derb, q, q2, fi, rai_inv, vtot; |
1271 | |
1272 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
1273 | { |
1274 | at0 = 0; |
1275 | at1 = cr->dd->nat_home; |
1276 | } |
1277 | else |
1278 | { |
1279 | at0 = 0; |
1280 | at1 = natoms; |
1281 | |
1282 | } |
1283 | |
1284 | /* Scale the electrostatics by gb_epsilon_solvent */ |
1285 | facel = facel * ((1.0/born->epsilon_r) - 1.0/born->gb_epsilon_solvent); |
1286 | |
1287 | vtot = 0.0; |
1288 | |
1289 | /* Apply self corrections */ |
1290 | for (i = at0; i < at1; i++) |
1291 | { |
1292 | ai = i; |
1293 | |
1294 | if (born->use[ai] == 1) |
1295 | { |
1296 | rai = born->bRad[ai]; |
1297 | rai_inv = 1.0/rai; |
1298 | q = charge[ai]; |
1299 | q2 = q*q; |
1300 | fi = facel*q2; |
1301 | e = fi*rai_inv; |
1302 | derb = 0.5*e*rai_inv*rai_inv; |
1303 | dvda[ai] += derb*rai; |
1304 | vtot -= 0.5*e; |
1305 | } |
1306 | } |
1307 | |
1308 | return vtot; |
1309 | |
1310 | } |
1311 | |
1312 | real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr, int natoms, gmx_genborn_t *born, gmx_localtop_t *top, |
1313 | real *dvda, t_mdatoms *md) |
1314 | { |
1315 | int ai, i, at0, at1; |
1316 | real e, es, rai, rbi, term, probe, tmp, factor; |
1317 | real rbi_inv, rbi_inv2; |
1318 | |
1319 | /* To keep the compiler happy */ |
1320 | factor = 0; |
1321 | |
1322 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
1323 | { |
1324 | at0 = 0; |
1325 | at1 = cr->dd->nat_home; |
1326 | } |
1327 | else |
1328 | { |
1329 | at0 = 0; |
1330 | at1 = natoms; |
1331 | } |
1332 | |
1333 | /* factor is the surface tension */ |
1334 | factor = born->sa_surface_tension; |
1335 | /* |
1336 | |
1337 | // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC |
1338 | if(gb_algorithm==egbSTILL) |
1339 | { |
1340 | factor=0.0049*100*CAL2JOULE; |
1341 | } |
1342 | else |
1343 | { |
1344 | factor=0.0054*100*CAL2JOULE; |
1345 | } |
1346 | */ |
1347 | /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */ |
1348 | |
1349 | es = 0; |
1350 | probe = 0.14; |
1351 | term = M_PI3.14159265358979323846*4; |
1352 | |
1353 | for (i = at0; i < at1; i++) |
1354 | { |
1355 | ai = i; |
1356 | |
1357 | if (born->use[ai] == 1) |
1358 | { |
1359 | rai = top->atomtypes.gb_radius[md->typeA[ai]]; |
1360 | rbi_inv = fr->invsqrta[ai]; |
1361 | rbi_inv2 = rbi_inv * rbi_inv; |
1362 | tmp = (rai*rbi_inv2)*(rai*rbi_inv2); |
1363 | tmp = tmp*tmp*tmp; |
1364 | e = factor*term*(rai+probe)*(rai+probe)*tmp; |
1365 | dvda[ai] = dvda[ai] - 6*e*rbi_inv2; |
1366 | es = es + e; |
1367 | } |
1368 | } |
1369 | |
1370 | return es; |
1371 | } |
1372 | |
1373 | |
1374 | |
1375 | real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[], rvec t[], rvec fshift[], |
1376 | rvec shift_vec[], int gb_algorithm, gmx_genborn_t *born) |
1377 | { |
1378 | int i, k, n, ai, aj, nj0, nj1, n0, n1; |
1379 | int shift; |
1380 | real shX, shY, shZ; |
1381 | real fgb, fij, rb2, rbi, fix1, fiy1, fiz1; |
1382 | real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11, rsq11; |
1383 | real rinv11, tx, ty, tz, rbai, rbaj, fgb_ai; |
1384 | real *rb; |
1385 | volatile int idx; |
1386 | |
1387 | n = 0; |
1388 | rb = born->work; |
1389 | |
1390 | n0 = 0; |
1391 | n1 = natoms; |
1392 | |
1393 | if (gb_algorithm == egbSTILL) |
1394 | { |
1395 | for (i = n0; i < n1; i++) |
1396 | { |
1397 | rbi = born->bRad[i]; |
1398 | rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0((332.0636930*(4.184))*0.1); |
1399 | } |
1400 | } |
1401 | else if (gb_algorithm == egbHCT) |
1402 | { |
1403 | for (i = n0; i < n1; i++) |
1404 | { |
1405 | rbi = born->bRad[i]; |
1406 | rb[i] = rbi * rbi * dvda[i]; |
1407 | } |
1408 | } |
1409 | else if (gb_algorithm == egbOBC) |
1410 | { |
1411 | for (i = n0; i < n1; i++) |
1412 | { |
1413 | rbi = born->bRad[i]; |
1414 | rb[i] = rbi * rbi * born->drobc[i] * dvda[i]; |
1415 | } |
1416 | } |
1417 | |
1418 | for (i = 0; i < nl->nri; i++) |
1419 | { |
1420 | ai = nl->iinr[i]; |
1421 | |
1422 | nj0 = nl->jindex[i]; |
1423 | nj1 = nl->jindex[i+1]; |
1424 | |
1425 | /* Load shifts for this list */ |
1426 | shift = nl->shift[i]; |
1427 | shX = shift_vec[shift][0]; |
1428 | shY = shift_vec[shift][1]; |
1429 | shZ = shift_vec[shift][2]; |
1430 | |
1431 | /* Load atom i coordinates, add shift vectors */ |
1432 | ix1 = shX + x[ai][0]; |
1433 | iy1 = shY + x[ai][1]; |
1434 | iz1 = shZ + x[ai][2]; |
1435 | |
1436 | fix1 = 0; |
1437 | fiy1 = 0; |
1438 | fiz1 = 0; |
1439 | |
1440 | rbai = rb[ai]; |
1441 | |
1442 | for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++) |
1443 | { |
1444 | aj = nl->jjnr[k]; |
1445 | |
1446 | jx1 = x[aj][0]; |
1447 | jy1 = x[aj][1]; |
1448 | jz1 = x[aj][2]; |
1449 | |
1450 | dx11 = ix1 - jx1; |
1451 | dy11 = iy1 - jy1; |
1452 | dz11 = iz1 - jz1; |
1453 | |
1454 | rbaj = rb[aj]; |
1455 | |
1456 | fgb = rbai*dadx[n++]; |
1457 | fgb_ai = rbaj*dadx[n++]; |
1458 | |
1459 | /* Total force between ai and aj is the sum of ai->aj and aj->ai */ |
1460 | fgb = fgb + fgb_ai; |
1461 | |
1462 | tx = fgb * dx11; |
1463 | ty = fgb * dy11; |
1464 | tz = fgb * dz11; |
1465 | |
1466 | fix1 = fix1 + tx; |
1467 | fiy1 = fiy1 + ty; |
1468 | fiz1 = fiz1 + tz; |
1469 | |
1470 | /* Update force on atom aj */ |
1471 | t[aj][0] = t[aj][0] - tx; |
1472 | t[aj][1] = t[aj][1] - ty; |
1473 | t[aj][2] = t[aj][2] - tz; |
1474 | } |
1475 | |
1476 | /* Update force and shift forces on atom ai */ |
1477 | t[ai][0] = t[ai][0] + fix1; |
1478 | t[ai][1] = t[ai][1] + fiy1; |
1479 | t[ai][2] = t[ai][2] + fiz1; |
1480 | |
1481 | fshift[shift][0] = fshift[shift][0] + fix1; |
1482 | fshift[shift][1] = fshift[shift][1] + fiy1; |
1483 | fshift[shift][2] = fshift[shift][2] + fiz1; |
1484 | |
1485 | } |
1486 | |
1487 | return 0; |
1488 | } |
1489 | |
1490 | |
1491 | void |
1492 | calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t *top, |
1493 | rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb, |
1494 | const t_pbc *pbc, const t_graph *graph, gmx_enerdata_t *enerd) |
1495 | { |
1496 | real v = 0; |
1497 | int cnt; |
1498 | int i; |
1499 | |
1500 | /* PBC or not? */ |
1501 | const t_pbc *pbc_null; |
1502 | |
1503 | if (fr->bMolPBC) |
1504 | { |
1505 | pbc_null = pbc; |
1506 | } |
1507 | else |
1508 | { |
1509 | pbc_null = NULL((void*)0); |
1510 | } |
1511 | |
1512 | if (sa_algorithm == esaAPPROX) |
1513 | { |
1514 | /* Do a simple ACE type approximation for the non-polar solvation */ |
1515 | enerd->term[F_NPSOLVATION] += calc_gb_nonpolar(cr, fr, born->nr, born, top, fr->dvda, md); |
1516 | } |
1517 | |
1518 | /* Calculate the bonded GB-interactions using either table or analytical formula */ |
1519 | enerd->term[F_GBPOL] += gb_bonds_tab(x, f, fr->fshift, md->chargeA, &(fr->gbtabscale), |
1520 | fr->invsqrta, fr->dvda, fr->gbtab.data, idef, born->epsilon_r, born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph); |
1521 | |
1522 | /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */ |
1523 | enerd->term[F_GBPOL] += calc_gb_selfcorrections(cr, born->nr, md->chargeA, born, fr->dvda, fr->epsfac); |
1524 | |
1525 | /* If parallel, sum the derivative of the potential w.r.t the born radii */ |
1526 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
1527 | { |
1528 | dd_atom_sum_real(cr->dd, fr->dvda); |
1529 | dd_atom_spread_real(cr->dd, fr->dvda); |
1530 | } |
1531 | |
1532 | if (fr->bAllvsAll) |
1533 | { |
1534 | #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER) |
1535 | if (fr->use_simd_kernels) |
1536 | { |
1537 | # ifdef GMX_DOUBLE |
1538 | genborn_allvsall_calc_chainrule_sse2_double(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb); |
1539 | # else |
1540 | genborn_allvsall_calc_chainrule_sse2_single(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb); |
1541 | # endif |
1542 | } |
1543 | else |
1544 | { |
1545 | genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb); |
1546 | } |
1547 | #else |
1548 | genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb); |
1549 | #endif |
1550 | cnt = md->homenr*(md->nr/2+1); |
1551 | /* 9 flops for outer loop, 15 for inner */ |
1552 | inc_nrnb(nrnb, eNR_BORN_AVA_CHAINRULE, md->homenr*9+cnt*15)(nrnb)->n[eNR_BORN_AVA_CHAINRULE] += md->homenr*9+cnt*15; |
1553 | return; |
1554 | } |
1555 | |
1556 | #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER) |
1557 | if (fr->use_simd_kernels) |
1558 | { |
1559 | # ifdef GMX_DOUBLE |
1560 | calc_gb_chainrule_sse2_double(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0], |
1561 | f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md); |
1562 | # else |
1563 | calc_gb_chainrule_sse2_single(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0], |
1564 | f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md); |
1565 | # endif |
1566 | } |
1567 | else |
1568 | { |
1569 | calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, |
1570 | x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md); |
1571 | } |
1572 | #else |
1573 | calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, |
1574 | x, f, fr->fshift, fr->shift_vec, gb_algorithm, born); |
1575 | #endif |
1576 | |
1577 | if (!fr->bAllvsAll) |
1578 | { |
1579 | /* 9 flops for outer loop, 15 for inner */ |
1580 | inc_nrnb(nrnb, eNR_BORN_CHAINRULE, fr->gblist.nri*9+fr->gblist.nrj*15)(nrnb)->n[eNR_BORN_CHAINRULE] += fr->gblist.nri*9+fr-> gblist.nrj*15; |
1581 | } |
1582 | } |
1583 | |
1584 | static void add_j_to_gblist(gbtmpnbl_t *list, int aj) |
1585 | { |
1586 | if (list->naj >= list->aj_nalloc) |
1587 | { |
1588 | list->aj_nalloc = over_alloc_large(list->naj+1)(int)(1.19*(list->naj+1) + 1000); |
1589 | srenew(list->aj, list->aj_nalloc)(list->aj) = save_realloc("list->aj", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1589, (list->aj), (list->aj_nalloc), sizeof(*(list-> aj))); |
1590 | } |
1591 | |
1592 | list->aj[list->naj++] = aj; |
1593 | } |
1594 | |
1595 | static gbtmpnbl_t *find_gbtmplist(struct gbtmpnbls *lists, int shift) |
1596 | { |
1597 | int ind, i; |
1598 | |
1599 | /* Search the list with the same shift, if there is one */ |
1600 | ind = 0; |
1601 | while (ind < lists->nlist && shift != lists->list[ind].shift) |
1602 | { |
1603 | ind++; |
1604 | } |
1605 | if (ind == lists->nlist) |
1606 | { |
1607 | if (lists->nlist == lists->list_nalloc) |
1608 | { |
1609 | lists->list_nalloc++; |
1610 | srenew(lists->list, lists->list_nalloc)(lists->list) = save_realloc("lists->list", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1610, (lists->list), (lists->list_nalloc), sizeof(*(lists ->list))); |
1611 | for (i = lists->nlist; i < lists->list_nalloc; i++) |
1612 | { |
1613 | lists->list[i].aj = NULL((void*)0); |
1614 | lists->list[i].aj_nalloc = 0; |
1615 | } |
1616 | |
1617 | } |
1618 | |
1619 | lists->list[lists->nlist].shift = shift; |
1620 | lists->list[lists->nlist].naj = 0; |
1621 | lists->nlist++; |
1622 | } |
1623 | |
1624 | return &lists->list[ind]; |
1625 | } |
1626 | |
1627 | static void add_bondeds_to_gblist(t_ilist *il, |
1628 | gmx_bool bMolPBC, t_pbc *pbc, t_graph *g, rvec *x, |
1629 | struct gbtmpnbls *nls) |
1630 | { |
1631 | int ind, j, ai, aj, shift, found; |
1632 | rvec dx; |
1633 | ivec dt; |
1634 | gbtmpnbl_t *list; |
1635 | |
1636 | shift = CENTRAL(((2*1 +1)*(2*1 +1)*(2*2 +1))/2); |
1637 | for (ind = 0; ind < il->nr; ind += 3) |
1638 | { |
1639 | ai = il->iatoms[ind+1]; |
1640 | aj = il->iatoms[ind+2]; |
1641 | |
1642 | shift = CENTRAL(((2*1 +1)*(2*1 +1)*(2*2 +1))/2); |
1643 | if (g != NULL((void*)0)) |
1644 | { |
1645 | rvec_sub(x[ai], x[aj], dx); |
1646 | ivec_sub(SHIFT_IVEC(g, ai)((g)->ishift[ai]), SHIFT_IVEC(g, aj)((g)->ishift[aj]), dt); |
1647 | shift = IVEC2IS(dt)(((2*2 +1)*((2*1 +1)*(((dt)[2])+1)+((dt)[1])+1)+((dt)[0])+2)); |
1648 | } |
1649 | else if (bMolPBC) |
1650 | { |
1651 | shift = pbc_dx_aiuc(pbc, x[ai], x[aj], dx); |
1652 | } |
1653 | |
1654 | /* Find the list for this shift or create one */ |
1655 | list = find_gbtmplist(&nls[ai], shift); |
1656 | |
1657 | found = 0; |
1658 | |
1659 | /* So that we do not add the same bond twice. |
1660 | * This happens with some constraints between 1-3 atoms |
1661 | * that are in the bond-list but should not be in the GB nb-list */ |
1662 | for (j = 0; j < list->naj; j++) |
1663 | { |
1664 | if (list->aj[j] == aj) |
1665 | { |
1666 | found = 1; |
1667 | } |
1668 | } |
1669 | |
1670 | if (found == 0) |
1671 | { |
1672 | if (ai == aj) |
1673 | { |
1674 | gmx_incons("ai == aj")_gmx_error("incons", "ai == aj", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1674); |
1675 | } |
1676 | |
1677 | add_j_to_gblist(list, aj); |
1678 | } |
1679 | } |
1680 | } |
1681 | |
1682 | static int |
1683 | compare_int (const void * a, const void * b) |
1684 | { |
1685 | return ( *(int*)a - *(int*)b ); |
1686 | } |
1687 | |
1688 | |
1689 | |
1690 | int make_gb_nblist(t_commrec *cr, int gb_algorithm, |
1691 | rvec x[], matrix box, |
1692 | t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born) |
1693 | { |
1694 | int i, l, ii, j, k, n, nj0, nj1, ai, aj, at0, at1, found, shift, s; |
1695 | int apa; |
1696 | t_nblist *nblist; |
1697 | t_pbc pbc; |
1698 | |
1699 | struct gbtmpnbls *nls; |
1700 | gbtmpnbl_t *list = NULL((void*)0); |
1701 | |
1702 | set_pbc(&pbc, fr->ePBC, box); |
1703 | nls = born->nblist_work; |
1704 | |
1705 | for (i = 0; i < born->nr; i++) |
1706 | { |
1707 | nls[i].nlist = 0; |
1708 | } |
1709 | |
1710 | if (fr->bMolPBC) |
1711 | { |
1712 | set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE1, box); |
1713 | } |
1714 | |
1715 | switch (gb_algorithm) |
1716 | { |
1717 | case egbHCT: |
1718 | case egbOBC: |
1719 | /* Loop over 1-2, 1-3 and 1-4 interactions */ |
1720 | for (j = F_GB12; j <= F_GB14; j++) |
1721 | { |
1722 | add_bondeds_to_gblist(&idef->il[j], fr->bMolPBC, &pbc, graph, x, nls); |
1723 | } |
1724 | break; |
1725 | case egbSTILL: |
1726 | /* Loop over 1-4 interactions */ |
1727 | add_bondeds_to_gblist(&idef->il[F_GB14], fr->bMolPBC, &pbc, graph, x, nls); |
1728 | break; |
1729 | default: |
1730 | gmx_incons("Unknown GB algorithm")_gmx_error("incons", "Unknown GB algorithm", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1730); |
1731 | } |
1732 | |
1733 | /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */ |
1734 | for (n = 0; (n < fr->nnblists); n++) |
1735 | { |
1736 | for (i = 0; (i < eNL_NR); i++) |
1737 | { |
1738 | nblist = &(fr->nblists[n].nlist_sr[i]); |
1739 | |
1740 | if (nblist->nri > 0 && (i == eNL_VDWQQ || i == eNL_QQ)) |
1741 | { |
1742 | for (j = 0; j < nblist->nri; j++) |
1743 | { |
1744 | ai = nblist->iinr[j]; |
1745 | shift = nblist->shift[j]; |
1746 | |
1747 | /* Find the list for this shift or create one */ |
1748 | list = find_gbtmplist(&nls[ai], shift); |
1749 | |
1750 | nj0 = nblist->jindex[j]; |
1751 | nj1 = nblist->jindex[j+1]; |
1752 | |
1753 | /* Add all the j-atoms in the non-bonded list to the GB list */ |
1754 | for (k = nj0; k < nj1; k++) |
1755 | { |
1756 | add_j_to_gblist(list, nblist->jjnr[k]); |
1757 | } |
1758 | } |
1759 | } |
1760 | } |
1761 | } |
1762 | |
1763 | /* Zero out some counters */ |
1764 | fr->gblist.nri = 0; |
1765 | fr->gblist.nrj = 0; |
1766 | |
1767 | fr->gblist.jindex[0] = fr->gblist.nri; |
1768 | |
1769 | for (i = 0; i < fr->natoms_force; i++) |
1770 | { |
1771 | for (s = 0; s < nls[i].nlist; s++) |
1772 | { |
1773 | list = &nls[i].list[s]; |
1774 | |
1775 | /* Only add those atoms that actually have neighbours */ |
1776 | if (born->use[i] != 0) |
1777 | { |
1778 | fr->gblist.iinr[fr->gblist.nri] = i; |
1779 | fr->gblist.shift[fr->gblist.nri] = list->shift; |
1780 | fr->gblist.nri++; |
1781 | |
1782 | for (k = 0; k < list->naj; k++) |
1783 | { |
1784 | /* Memory allocation for jjnr */ |
1785 | if (fr->gblist.nrj >= fr->gblist.maxnrj) |
1786 | { |
1787 | fr->gblist.maxnrj += over_alloc_large(fr->gblist.maxnrj)(int)(1.19*(fr->gblist.maxnrj) + 1000); |
1788 | |
1789 | if (debug) |
1790 | { |
1791 | fprintf(debug, "Increasing GB neighbourlist j size to %d\n", fr->gblist.maxnrj); |
1792 | } |
1793 | |
1794 | srenew(fr->gblist.jjnr, fr->gblist.maxnrj)(fr->gblist.jjnr) = save_realloc("fr->gblist.jjnr", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1794, (fr->gblist.jjnr), (fr->gblist.maxnrj), sizeof( *(fr->gblist.jjnr))); |
1795 | } |
1796 | |
1797 | /* Put in list */ |
1798 | if (i == list->aj[k]) |
1799 | { |
1800 | gmx_incons("i == list->aj[k]")_gmx_error("incons", "i == list->aj[k]", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1800); |
1801 | } |
1802 | fr->gblist.jjnr[fr->gblist.nrj++] = list->aj[k]; |
1803 | } |
1804 | |
1805 | fr->gblist.jindex[fr->gblist.nri] = fr->gblist.nrj; |
1806 | } |
1807 | } |
1808 | } |
1809 | |
1810 | |
1811 | #ifdef SORT_GB_LIST |
1812 | for (i = 0; i < fr->gblist.nri; i++) |
1813 | { |
1814 | nj0 = fr->gblist.jindex[i]; |
1815 | nj1 = fr->gblist.jindex[i+1]; |
1816 | ai = fr->gblist.iinr[i]; |
1817 | |
1818 | /* Temporary fix */ |
1819 | for (j = nj0; j < nj1; j++) |
1820 | { |
1821 | if (fr->gblist.jjnr[j] < ai) |
1822 | { |
1823 | fr->gblist.jjnr[j] += fr->natoms_force; |
1824 | } |
1825 | } |
1826 | qsort(fr->gblist.jjnr+nj0, nj1-nj0, sizeof(int), compare_int); |
1827 | /* Fix back */ |
1828 | for (j = nj0; j < nj1; j++) |
1829 | { |
1830 | if (fr->gblist.jjnr[j] >= fr->natoms_force) |
1831 | { |
1832 | fr->gblist.jjnr[j] -= fr->natoms_force; |
1833 | } |
1834 | } |
1835 | |
1836 | } |
1837 | #endif |
1838 | |
1839 | return 0; |
1840 | } |
1841 | |
1842 | void make_local_gb(const t_commrec *cr, gmx_genborn_t *born, int gb_algorithm) |
1843 | { |
1844 | int i, at0, at1; |
1845 | gmx_domdec_t *dd = NULL((void*)0); |
1846 | |
1847 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1))) |
1848 | { |
1849 | dd = cr->dd; |
1850 | at0 = 0; |
1851 | at1 = dd->nat_tot; |
1852 | } |
1853 | else |
1854 | { |
1855 | /* Single node, just copy pointers and return */ |
1856 | if (gb_algorithm == egbSTILL) |
1857 | { |
1858 | born->gpol = born->gpol_globalindex; |
1859 | born->vsolv = born->vsolv_globalindex; |
1860 | born->gb_radius = born->gb_radius_globalindex; |
1861 | } |
1862 | else |
1863 | { |
1864 | born->param = born->param_globalindex; |
1865 | born->gb_radius = born->gb_radius_globalindex; |
1866 | } |
1867 | |
1868 | born->use = born->use_globalindex; |
1869 | |
1870 | return; |
1871 | } |
1872 | |
1873 | /* Reallocation of local arrays if necessary */ |
1874 | /* fr->natoms_force is equal to dd->nat_tot */ |
1875 | if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && dd->nat_tot > born->nalloc) |
1876 | { |
1877 | int nalloc; |
1878 | |
1879 | nalloc = dd->nat_tot; |
1880 | |
1881 | /* Arrays specific to different gb algorithms */ |
1882 | if (gb_algorithm == egbSTILL) |
1883 | { |
1884 | srenew(born->gpol, nalloc+3)(born->gpol) = save_realloc("born->gpol", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1884, (born->gpol), (nalloc+3), sizeof(*(born->gpol)) ); |
1885 | srenew(born->vsolv, nalloc+3)(born->vsolv) = save_realloc("born->vsolv", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1885, (born->vsolv), (nalloc+3), sizeof(*(born->vsolv ))); |
1886 | srenew(born->gb_radius, nalloc+3)(born->gb_radius) = save_realloc("born->gb_radius", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1886, (born->gb_radius), (nalloc+3), sizeof(*(born->gb_radius ))); |
1887 | for (i = born->nalloc; (i < nalloc+3); i++) |
1888 | { |
1889 | born->gpol[i] = 0; |
1890 | born->vsolv[i] = 0; |
1891 | born->gb_radius[i] = 0; |
1892 | } |
1893 | } |
1894 | else |
1895 | { |
1896 | srenew(born->param, nalloc+3)(born->param) = save_realloc("born->param", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1896, (born->param), (nalloc+3), sizeof(*(born->param ))); |
1897 | srenew(born->gb_radius, nalloc+3)(born->gb_radius) = save_realloc("born->gb_radius", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1897, (born->gb_radius), (nalloc+3), sizeof(*(born->gb_radius ))); |
1898 | for (i = born->nalloc; (i < nalloc+3); i++) |
1899 | { |
1900 | born->param[i] = 0; |
1901 | born->gb_radius[i] = 0; |
1902 | } |
1903 | } |
1904 | |
1905 | /* All gb-algorithms use the array for vsites exclusions */ |
1906 | srenew(born->use, nalloc+3)(born->use) = save_realloc("born->use", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/genborn.c" , 1906, (born->use), (nalloc+3), sizeof(*(born->use))); |
1907 | for (i = born->nalloc; (i < nalloc+3); i++) |
1908 | { |
1909 | born->use[i] = 0; |
1910 | } |
1911 | |
1912 | born->nalloc = nalloc; |
1913 | } |
1914 | |
1915 | /* With dd, copy algorithm specific arrays */ |
1916 | if (gb_algorithm == egbSTILL) |
1917 | { |
1918 | for (i = at0; i < at1; i++) |
1919 | { |
1920 | born->gpol[i] = born->gpol_globalindex[dd->gatindex[i]]; |
1921 | born->vsolv[i] = born->vsolv_globalindex[dd->gatindex[i]]; |
1922 | born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]]; |
1923 | born->use[i] = born->use_globalindex[dd->gatindex[i]]; |
1924 | } |
1925 | } |
1926 | else |
1927 | { |
1928 | for (i = at0; i < at1; i++) |
1929 | { |
1930 | born->param[i] = born->param_globalindex[dd->gatindex[i]]; |
1931 | born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]]; |
1932 | born->use[i] = born->use_globalindex[dd->gatindex[i]]; |
1933 | } |
1934 | } |
1935 | } |