/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim

Bug Summary

File:	gromacs/mdlib/sim_util.c
Location:	line 842, column 5
Description:	Value stored to 'cg0' is never read

Annotated Source Code

1	/*
2	* This file is part of the GROMACS molecular simulation package.
3	*
4	* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5	* Copyright (c) 2001-2004, The GROMACS development team.
6	* Copyright (c) 2013,2014, by the GROMACS development team, led by
7	* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8	* and including many others, as listed in the AUTHORS file in the
9	* top-level source directory and at http://www.gromacs.org.
10	*
11	* GROMACS is free software; you can redistribute it and/or
12	* modify it under the terms of the GNU Lesser General Public License
13	* as published by the Free Software Foundation; either version 2.1
14	* of the License, or (at your option) any later version.
15	*
16	* GROMACS is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19	* Lesser General Public License for more details.
20	*
21	* You should have received a copy of the GNU Lesser General Public
22	* License along with GROMACS; if not, see
23	* http://www.gnu.org/licenses, or write to the Free Software Foundation,
24	* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25	*
26	* If you want to redistribute modifications to GROMACS, please
27	* consider that scientific software is very special. Version
28	* control is crucial - bugs must be traceable. We will be happy to
29	* consider code for inclusion in the official distribution, but
30	* derived work must not be called official GROMACS. Details are found
31	* in the README & COPYING files - if they are missing, get the
32	* official version at http://www.gromacs.org.
33	*
34	* To help us fund GROMACS development, we humbly ask that you cite
35	* the research papers on the package. Check out http://www.gromacs.org.
36	*/
37	#ifdef HAVE_CONFIG_H1
38	#include <config.h>
39	#endif
40
41	#include <assert.h>
42	#include <math.h>
43	#include <stdio.h>
44	#include <string.h>
45	#ifdef HAVE_SYS_TIME_H
46	#include <sys/time.h>
47	#endif
48
49	#include "typedefs.h"
50	#include "gromacs/utility/cstringutil.h"
51	#include "gromacs/utility/smalloc.h"
52	#include "names.h"
53	#include "txtdump.h"
54	#include "pbc.h"
55	#include "chargegroup.h"
56	#include "gromacs/math/vec.h"
57	#include "nrnb.h"
58	#include "mshift.h"
59	#include "mdrun.h"
60	#include "sim_util.h"
61	#include "update.h"
62	#include "physics.h"
63	#include "mdatoms.h"
64	#include "force.h"
65	#include "bondf.h"
66	#include "pme.h"
67	#include "disre.h"
68	#include "orires.h"
69	#include "network.h"
70	#include "calcmu.h"
71	#include "constr.h"
72	#include "copyrite.h"
73	#include "domdec.h"
74	#include "genborn.h"
75	#include "nbnxn_atomdata.h"
76	#include "nbnxn_search.h"
77	#include "nbnxn_kernels/nbnxn_kernel_ref.h"
78	#include "nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
79	#include "nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
80	#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
81	#include "nonbonded.h"
82	#include "../gmxlib/nonbonded/nb_kernel.h"
83	#include "../gmxlib/nonbonded/nb_free_energy.h"
84
85	#include "gromacs/timing/wallcycle.h"
86	#include "gromacs/timing/walltime_accounting.h"
87	#include "gromacs/utility/gmxmpi.h"
88	#include "gromacs/essentialdynamics/edsam.h"
89	#include "gromacs/pulling/pull.h"
90	#include "gromacs/pulling/pull_rotation.h"
91	#include "gromacs/imd/imd.h"
92	#include "adress.h"
93	#include "qmmm.h"
94
95	#include "gmx_omp_nthreads.h"
96
97	#include "nbnxn_cuda_data_mgmt.h"
98	#include "nbnxn_cuda/nbnxn_cuda.h"
99
100	void print_time(FILE *out,
101	gmx_walltime_accounting_t walltime_accounting,
102	gmx_int64_t step,
103	t_inputrec *ir,
104	t_commrec gmx_unused__attribute__ ((unused)) *cr)
105	{
106	time_t finish;
107	char timebuf[STRLEN4096];
108	double dt, elapsed_seconds, time_per_step;
109	char buf[48];
110
111	#ifndef GMX_THREAD_MPI
112	if (!PAR(cr)((cr)->nnodes > 1))
113	#endif
114	{
115	fprintf(out, "\r");
116	}
117	fprintf(out, "step %s", gmx_step_str(step, buf));
118	if ((step >= ir->nstlist))
119	{
120	double seconds_since_epoch = gmx_gettime();
121	elapsed_seconds = seconds_since_epoch - walltime_accounting_get_start_time_stamp(walltime_accounting);
122	time_per_step = elapsed_seconds/(step - ir->init_step + 1);
123	dt = (ir->nsteps + ir->init_step - step) * time_per_step;
124
125	if (ir->nsteps >= 0)
126	{
127	if (dt >= 300)
128	{
129	finish = (time_t) (seconds_since_epoch + dt);
130	gmx_ctime_r(&finish, timebuf, STRLEN4096);
131	sprintf(buf, "%s", timebuf);
132	buf[strlen(buf)-1] = '\0';
133	fprintf(out, ", will finish %s", buf);
134	}
135	else
136	{
137	fprintf(out, ", remaining wall clock time: %5d s ", (int)dt);
138	}
139	}
140	else
141	{
142	fprintf(out, " performance: %.1f ns/day ",
143	ir->delta_t/10002460*60/time_per_step);
144	}
145	}
146	#ifndef GMX_THREAD_MPI
147	if (PAR(cr)((cr)->nnodes > 1))
148	{
149	fprintf(out, "\n");
150	}
151	#endif
152
153	fflush(out);
154	}
155
156	void print_date_and_time(FILE fplog, int nodeid, const char title,
157	double the_time)
158	{
159	char time_string[STRLEN4096];
160
161	if (!fplog)
162	{
163	return;
164	}
165
166	{
167	int i;
168	char timebuf[STRLEN4096];
169	time_t temp_time = (time_t) the_time;
170
171	gmx_ctime_r(&temp_time, timebuf, STRLEN4096);
172	for (i = 0; timebuf[i] >= ' '; i++)
173	{
174	time_string[i] = timebuf[i];
175	}
176	time_string[i] = '\0';
177	}
178
179	fprintf(fplog, "%s on node %d %s\n", title, nodeid, time_string);
180	}
181
182	void print_start(FILE fplog, t_commrec cr,
183	gmx_walltime_accounting_t walltime_accounting,
184	const char *name)
185	{
186	char buf[STRLEN4096];
187
188	sprintf(buf, "Started %s", name);
189	print_date_and_time(fplog, cr->nodeid, buf,
190	walltime_accounting_get_start_time_stamp(walltime_accounting));
191	}
192
193	static void sum_forces(int start, int end, rvec f[], rvec flr[])
194	{
195	int i;
196
197	if (gmx_debug_at)
198	{
199	pr_rvecs(debug, 0, "fsr", f+start, end-start);
200	pr_rvecs(debug, 0, "flr", flr+start, end-start);
201	}
202	for (i = start; (i < end); i++)
203	{
204	rvec_inc(f[i], flr[i]);
205	}
206	}
207
208	/*
209	* calc_f_el calculates forces due to an electric field.
210	*
211	* force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
212	*
213	* Et[] contains the parameters for the time dependent
214	* part of the field (not yet used).
215	* Ex[] contains the parameters for
216	* the spatial dependent part of the field. You can have cool periodic
217	* fields in principle, but only a constant field is supported
218	* now.
219	* The function should return the energy due to the electric field
220	* (if any) but for now returns 0.
221	*
222	* WARNING:
223	* There can be problems with the virial.
224	* Since the field is not self-consistent this is unavoidable.
225	* For neutral molecules the virial is correct within this approximation.
226	* For neutral systems with many charged molecules the error is small.
227	* But for systems with a net charge or a few charged molecules
228	* the error can be significant when the field is high.
229	* Solution: implement a self-consitent electric field into PME.
230	*/
231	static void calc_f_el(FILE *fp, int start, int homenr,
232	real charge[], rvec f[],
233	t_cosines Ex[], t_cosines Et[], double t)
234	{
235	rvec Ext;
236	real t0;
237	int i, m;
238
239	for (m = 0; (m < DIM3); m++)
240	{
241	if (Et[m].n > 0)
242	{
243	if (Et[m].n == 3)
244	{
245	t0 = Et[m].a[1];
246	Ext[m] = cos(Et[m].a[0](t-t0))exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
247	}
248	else
249	{
250	Ext[m] = cos(Et[m].a[0]*t);
251	}
252	}
253	else
254	{
255	Ext[m] = 1.0;
256	}
257	if (Ex[m].n > 0)
258	{
259	/* Convert the field strength from V/nm to MD-units */
260	Ext[m] = Ex[m].a[0]FIELDFAC(((1.60217733e-19)*(6.0221367e23))/(1e3));
261	for (i = start; (i < start+homenr); i++)
262	{
263	f[i][m] += charge[i]*Ext[m];
264	}
265	}
266	else
267	{
268	Ext[m] = 0;
269	}
270	}
271	if (fp != NULL((void*)0))
272	{
273	fprintf(fp, "%10g %10g %10g %10g #FIELD\n", t,
274	Ext[XX0]/FIELDFAC(((1.60217733e-19)(6.0221367e23))/(1e3)), Ext[YY1]/FIELDFAC(((1.60217733e-19)(6.0221367e23))/(1e3)), Ext[ZZ2]/FIELDFAC(((1.60217733e-19)*(6.0221367e23))/(1e3)));
275	}
276	}
277
278	static void calc_virial(int start, int homenr, rvec x[], rvec f[],
279	tensor vir_part, t_graph *graph, matrix box,
280	t_nrnb nrnb, const t_forcerec fr, int ePBC)
281	{
282	int i, j;
283	tensor virtest;
284
285	/* The short-range virial from surrounding boxes */
286	clear_mat(vir_part);
287	calc_vir(SHIFTS((21 +1)(21 +1)(2*2 +1)), fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box);
288	inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS)(nrnb)->n[eNR_VIRIAL] += ((21 +1)(21 +1)(2*2 +1));
289
290	/* Calculate partial virial, for local atoms only, based on short range.
291	* Total virial is computed in global_stat, called from do_md
292	*/
293	f_calc_vir(start, start+homenr, x, f, vir_part, graph, box);
294	inc_nrnb(nrnb, eNR_VIRIAL, homenr)(nrnb)->n[eNR_VIRIAL] += homenr;
295
296	/* Add position restraint contribution */
297	for (i = 0; i < DIM3; i++)
298	{
299	vir_part[i][i] += fr->vir_diag_posres[i];
300	}
301
302	/* Add wall contribution */
303	for (i = 0; i < DIM3; i++)
304	{
305	vir_part[i][ZZ2] += fr->vir_wall_z[i];
306	}
307
308	if (debug)
309	{
310	pr_rvecs(debug, 0, "vir_part", vir_part, DIM3);
311	}
312	}
313
314	static void posres_wrapper(FILE *fplog,
315	int flags,
316	gmx_bool bSepDVDL,
317	t_inputrec *ir,
318	t_nrnb *nrnb,
319	gmx_localtop_t *top,
320	matrix box, rvec x[],
321	gmx_enerdata_t *enerd,
322	real *lambda,
323	t_forcerec *fr)
324	{
325	t_pbc pbc;
326	real v, dvdl;
327	int i;
328
329	/* Position restraints always require full pbc */
330	set_pbc(&pbc, ir->ePBC, box);
331	dvdl = 0;
332	v = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms,
333	top->idef.iparams_posres,
334	(const rvec*)x, fr->f_novirsum, fr->vir_diag_posres,
335	ir->ePBC == epbcNONE ? NULL((void*)0) : &pbc,
336	lambda[efptRESTRAINT], &dvdl,
337	fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
338	if (bSepDVDL)
339	{
340	gmx_print_sepdvdl(fplog, interaction_function[F_POSRES].longname, v, dvdl);
341	}
342	enerd->term[F_POSRES] += v;
343	/* If just the force constant changes, the FEP term is linear,
344	* but if k changes, it is not.
345	*/
346	enerd->dvdl_nonlin[efptRESTRAINT] += dvdl;
347	inc_nrnb(nrnb, eNR_POSRES, top->idef.il[F_POSRES].nr/2)(nrnb)->n[eNR_POSRES] += top->idef.il[F_POSRES].nr/2;
348
349	if ((ir->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL(1<<10)))
350	{
351	for (i = 0; i < enerd->n_lambda; i++)
352	{
353	real dvdl_dum, lambda_dum;
354
355	lambda_dum = (i == 0 ? lambda[efptRESTRAINT] : ir->fepvals->all_lambda[efptRESTRAINT][i-1]);
356	v = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms,
357	top->idef.iparams_posres,
358	(const rvec)x, NULL((void)0), NULL((void*)0),
359	ir->ePBC == epbcNONE ? NULL((void*)0) : &pbc, lambda_dum, &dvdl,
360	fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
361	enerd->enerpart_lambda[i] += v;
362	}
363	}
364	}
365
366	static void fbposres_wrapper(t_inputrec *ir,
367	t_nrnb *nrnb,
368	gmx_localtop_t *top,
369	matrix box, rvec x[],
370	gmx_enerdata_t *enerd,
371	t_forcerec *fr)
372	{
373	t_pbc pbc;
374	real v;
375
376	/* Flat-bottomed position restraints always require full pbc */
377	set_pbc(&pbc, ir->ePBC, box);
378	v = fbposres(top->idef.il[F_FBPOSRES].nr, top->idef.il[F_FBPOSRES].iatoms,
379	top->idef.iparams_fbposres,
380	(const rvec*)x, fr->f_novirsum, fr->vir_diag_posres,
381	ir->ePBC == epbcNONE ? NULL((void*)0) : &pbc,
382	fr->rc_scaling, fr->ePBC, fr->posres_com);
383	enerd->term[F_FBPOSRES] += v;
384	inc_nrnb(nrnb, eNR_FBPOSRES, top->idef.il[F_FBPOSRES].nr/2)(nrnb)->n[eNR_FBPOSRES] += top->idef.il[F_FBPOSRES].nr/ 2;
385	}
386
387	static void pull_potential_wrapper(FILE *fplog,
388	gmx_bool bSepDVDL,
389	t_commrec *cr,
390	t_inputrec *ir,
391	matrix box, rvec x[],
392	rvec f[],
393	tensor vir_force,
394	t_mdatoms *mdatoms,
395	gmx_enerdata_t *enerd,
396	real *lambda,
397	double t)
398	{
399	t_pbc pbc;
400	real dvdl;
401
402	/* Calculate the center of mass forces, this requires communication,
403	* which is why pull_potential is called close to other communication.
404	* The virial contribution is calculated directly,
405	* which is why we call pull_potential after calc_virial.
406	*/
407	set_pbc(&pbc, ir->ePBC, box);
408	dvdl = 0;
409	enerd->term[F_COM_PULL] +=
410	pull_potential(ir->ePull, ir->pull, mdatoms, &pbc,
411	cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl);
412	if (bSepDVDL)
413	{
414	gmx_print_sepdvdl(fplog, "Com pull", enerd->term[F_COM_PULL], dvdl);
415	}
416	enerd->dvdl_lin[efptRESTRAINT] += dvdl;
417	}
418
419	static void pme_receive_force_ener(FILE *fplog,
420	gmx_bool bSepDVDL,
421	t_commrec *cr,
422	gmx_wallcycle_t wcycle,
423	gmx_enerdata_t *enerd,
424	t_forcerec *fr)
425	{
426	real e_q, e_lj, v, dvdl_q, dvdl_lj;
427	float cycles_ppdpme, cycles_seppme;
428
429	cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
430	dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
431
432	/* In case of node-splitting, the PP nodes receive the long-range
433	* forces, virial and energy from the PME nodes here.
434	*/
435	wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
436	dvdl_q = 0;
437	dvdl_lj = 0;
438	gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q,
439	fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
440	&cycles_seppme);
441	if (bSepDVDL)
442	{
443	gmx_print_sepdvdl(fplog, "Electrostatic PME mesh", e_q, dvdl_q);
444	gmx_print_sepdvdl(fplog, "Lennard-Jones PME mesh", e_lj, dvdl_lj);
445	}
446	enerd->term[F_COUL_RECIP] += e_q;
447	enerd->term[F_LJ_RECIP] += e_lj;
448	enerd->dvdl_lin[efptCOUL] += dvdl_q;
449	enerd->dvdl_lin[efptVDW] += dvdl_lj;
450
451	if (wcycle)
452	{
453	dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
454	}
455	wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
456	}
457
458	static void print_large_forces(FILE fp, t_mdatoms md, t_commrec *cr,
459	gmx_int64_t step, real pforce, rvec x, rvec f)
460	{
461	int i;
462	real pf2, fn2;
463	char buf[STEPSTRSIZE22];
464
465	pf2 = sqr(pforce);
466	for (i = 0; i < md->homenr; i++)
467	{
468	fn2 = norm2(f[i]);
469	/* We also catch NAN, if the compiler does not optimize this away. */
470	if (fn2 >= pf2 \|\| fn2 != fn2)
471	{
472	fprintf(fp, "step %s atom %6d x %8.3f %8.3f %8.3f force %12.5e\n",
473	gmx_step_str(step, buf),
474	ddglatnr(cr->dd, i), x[i][XX0], x[i][YY1], x[i][ZZ2], sqrt(fn2));
475	}
476	}
477	}
478
479	static void post_process_forces(t_commrec *cr,
480	gmx_int64_t step,
481	t_nrnb *nrnb, gmx_wallcycle_t wcycle,
482	gmx_localtop_t *top,
483	matrix box, rvec x[],
484	rvec f[],
485	tensor vir_force,
486	t_mdatoms *mdatoms,
487	t_graph *graph,
488	t_forcerec fr, gmx_vsite_t vsite,
489	int flags)
490	{
491	if (fr->bF_NoVirSum)
492	{
493	if (vsite)
494	{
495	/* Spread the mesh force on virtual sites to the other particles...
496	* This is parallellized. MPI communication is performed
497	* if the constructing atoms aren't local.
498	*/
499	wallcycle_start(wcycle, ewcVSITESPREAD);
500	spread_vsite_f(vsite, x, fr->f_novirsum, NULL((void*)0),
501	(flags & GMX_FORCE_VIRIAL(1<<8)), fr->vir_el_recip,
502	nrnb,
503	&top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
504	wallcycle_stop(wcycle, ewcVSITESPREAD);
505	}
506	if (flags & GMX_FORCE_VIRIAL(1<<8))
507	{
508	/* Now add the forces, this is local */
509	if (fr->bDomDec)
510	{
511	sum_forces(0, fr->f_novirsum_n, f, fr->f_novirsum);
512	}
513	else
514	{
515	sum_forces(0, mdatoms->homenr,
516	f, fr->f_novirsum);
517	}
518	if (EEL_FULL(fr->eeltype)((((fr->eeltype) == eelPME \|\| (fr->eeltype) == eelPMESWITCH \|\| (fr->eeltype) == eelPMEUSER \|\| (fr->eeltype) == eelPMEUSERSWITCH \|\| (fr->eeltype) == eelP3M_AD) \|\| (fr->eeltype) == eelEWALD ) \|\| (fr->eeltype) == eelPOISSON))
519	{
520	/* Add the mesh contribution to the virial */
521	m_add(vir_force, fr->vir_el_recip, vir_force);
522	}
523	if (EVDW_PME(fr->vdwtype)((fr->vdwtype) == evdwPME))
524	{
525	/* Add the mesh contribution to the virial */
526	m_add(vir_force, fr->vir_lj_recip, vir_force);
527	}
528	if (debug)
529	{
530	pr_rvecs(debug, 0, "vir_force", vir_force, DIM3);
531	}
532	}
533	}
534
535	if (fr->print_force >= 0)
536	{
537	print_large_forces(stderrstderr, mdatoms, cr, step, fr->print_force, x, f);
538	}
539	}
540
541	static void do_nb_verlet(t_forcerec *fr,
542	interaction_const_t *ic,
543	gmx_enerdata_t *enerd,
544	int flags, int ilocality,
545	int clearF,
546	t_nrnb *nrnb,
547	gmx_wallcycle_t wcycle)
548	{
549	int nnbl, kernel_type, enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
550	char *env;
551	nonbonded_verlet_group_t *nbvg;
552	gmx_bool bCUDA;
553
554	if (!(flags & GMX_FORCE_NONBONDED(1<<6)))
555	{
556	/* skip non-bonded calculation */
557	return;
558	}
559
560	nbvg = &fr->nbv->grp[ilocality];
561
562	/* CUDA kernel launch overhead is already timed separately */
563	if (fr->cutoff_scheme != ecutsVERLET)
564	{
565	gmx_incons("Invalid cut-off scheme passed!")_gmx_error("incons", "Invalid cut-off scheme passed!", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 565);
566	}
567
568	bCUDA = (nbvg->kernel_type == nbnxnk8x8x8_CUDA);
569
570	if (!bCUDA)
571	{
572	wallcycle_sub_start(wcycle, ewcsNONBONDED);
573	}
574	switch (nbvg->kernel_type)
575	{
576	case nbnxnk4x4_PlainC:
577	nbnxn_kernel_ref(&nbvg->nbl_lists,
578	nbvg->nbat, ic,
579	fr->shift_vec,
580	flags,
581	clearF,
582	fr->fshift[0],
583	enerd->grpp.ener[egCOULSR],
584	fr->bBHAM ?
585	enerd->grpp.ener[egBHAMSR] :
586	enerd->grpp.ener[egLJSR]);
587	break;
588
589	case nbnxnk4xN_SIMD_4xN:
590	nbnxn_kernel_simd_4xn(&nbvg->nbl_lists,
591	nbvg->nbat, ic,
592	nbvg->ewald_excl,
593	fr->shift_vec,
594	flags,
595	clearF,
596	fr->fshift[0],
597	enerd->grpp.ener[egCOULSR],
598	fr->bBHAM ?
599	enerd->grpp.ener[egBHAMSR] :
600	enerd->grpp.ener[egLJSR]);
601	break;
602	case nbnxnk4xN_SIMD_2xNN:
603	nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists,
604	nbvg->nbat, ic,
605	nbvg->ewald_excl,
606	fr->shift_vec,
607	flags,
608	clearF,
609	fr->fshift[0],
610	enerd->grpp.ener[egCOULSR],
611	fr->bBHAM ?
612	enerd->grpp.ener[egBHAMSR] :
613	enerd->grpp.ener[egLJSR]);
614	break;
615
616	case nbnxnk8x8x8_CUDA:
617	nbnxn_cuda_launch_kernel(fr->nbv->cu_nbv, nbvg->nbat, flags, ilocality);
618	break;
619
620	case nbnxnk8x8x8_PlainC:
621	nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
622	nbvg->nbat, ic,
623	fr->shift_vec,
624	flags,
625	clearF,
626	nbvg->nbat->out[0].f,
627	fr->fshift[0],
628	enerd->grpp.ener[egCOULSR],
629	fr->bBHAM ?
630	enerd->grpp.ener[egBHAMSR] :
631	enerd->grpp.ener[egLJSR]);
632	break;
633
634	default:
635	gmx_incons("Invalid nonbonded kernel type passed!")_gmx_error("incons", "Invalid nonbonded kernel type passed!", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c", 635);
636
637	}
638	if (!bCUDA)
639	{
640	wallcycle_sub_stop(wcycle, ewcsNONBONDED);
641	}
642
643	if (EEL_RF(ic->eeltype)((ic->eeltype) == eelRF \|\| (ic->eeltype) == eelGRF \|\| ( ic->eeltype) == eelRF_NEC \|\| (ic->eeltype) == eelRF_ZERO ) \|\| ic->eeltype == eelCUT)
644	{
645	enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
646	}
647	else if ((!bCUDA && nbvg->ewald_excl == ewaldexclAnalytical) \|\|
648	(bCUDA && nbnxn_cuda_is_kernel_ewald_analytical(fr->nbv->cu_nbv)))
649	{
650	enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
651	}
652	else
653	{
654	enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
655	}
656	enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
657	if (flags & GMX_FORCE_ENERGY(1<<9))
658	{
659	/* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
660	enr_nbnxn_kernel_ljc += 1;
661	enr_nbnxn_kernel_lj += 1;
662	}
663
664	inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,(nrnb)->n[enr_nbnxn_kernel_ljc] += nbvg->nbl_lists.natpair_ljq
665	nbvg->nbl_lists.natpair_ljq)(nrnb)->n[enr_nbnxn_kernel_ljc] += nbvg->nbl_lists.natpair_ljq;
666	inc_nrnb(nrnb, enr_nbnxn_kernel_lj,(nrnb)->n[enr_nbnxn_kernel_lj] += nbvg->nbl_lists.natpair_lj
667	nbvg->nbl_lists.natpair_lj)(nrnb)->n[enr_nbnxn_kernel_lj] += nbvg->nbl_lists.natpair_lj;
668	/* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
669	inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,(nrnb)->n[enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF ] += nbvg->nbl_lists.natpair_q
670	nbvg->nbl_lists.natpair_q)(nrnb)->n[enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF ] += nbvg->nbl_lists.natpair_q;
671
672	if (ic->vdw_modifier == eintmodFORCESWITCH)
673	{
674	/* We add up the switch cost separately */
675	inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),(nrnb)->n[eNR_NBNXN_ADD_LJ_FSW+((flags & (1<<9)) ? 1 : 0)] += nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists .natpair_lj
676	nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj)(nrnb)->n[eNR_NBNXN_ADD_LJ_FSW+((flags & (1<<9)) ? 1 : 0)] += nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists .natpair_lj;
677	}
678	if (ic->vdw_modifier == eintmodPOTSWITCH)
679	{
680	/* We add up the switch cost separately */
681	inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),(nrnb)->n[eNR_NBNXN_ADD_LJ_PSW+((flags & (1<<9)) ? 1 : 0)] += nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists .natpair_lj
682	nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj)(nrnb)->n[eNR_NBNXN_ADD_LJ_PSW+((flags & (1<<9)) ? 1 : 0)] += nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists .natpair_lj;
683	}
684	if (ic->vdwtype == evdwPME)
685	{
686	/* We add up the LJ Ewald cost separately */
687	inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD+((flags & GMX_FORCE_ENERGY) ? 1 : 0),(nrnb)->n[eNR_NBNXN_ADD_LJ_EWALD+((flags & (1<<9 )) ? 1 : 0)] += nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists .natpair_lj
688	nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj)(nrnb)->n[eNR_NBNXN_ADD_LJ_EWALD+((flags & (1<<9 )) ? 1 : 0)] += nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists .natpair_lj;
689	}
690	}
691
692	static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
693	t_forcerec *fr,
694	rvec x[],
695	rvec f[],
696	t_mdatoms *mdatoms,
697	t_lambda *fepvals,
698	real *lambda,
699	gmx_enerdata_t *enerd,
700	int flags,
701	t_nrnb *nrnb,
702	gmx_wallcycle_t wcycle)
703	{
704	int donb_flags;
705	nb_kernel_data_t kernel_data;
706	real lam_i[efptNR];
707	real dvdl_nb[efptNR];
708	int th;
709	int i, j;
710
711	donb_flags = 0;
712	/* Add short-range interactions */
713	donb_flags \|= GMX_NONBONDED_DO_SR(1<<5);
714
715	/* Currently all group scheme kernels always calculate (shift-)forces */
716	if (flags & GMX_FORCE_FORCES(1<<7))
717	{
718	donb_flags \|= GMX_NONBONDED_DO_FORCE(1<<1);
719	}
720	if (flags & GMX_FORCE_VIRIAL(1<<8))
721	{
722	donb_flags \|= GMX_NONBONDED_DO_SHIFTFORCE(1<<2);
723	}
724	if (flags & GMX_FORCE_ENERGY(1<<9))
725	{
726	donb_flags \|= GMX_NONBONDED_DO_POTENTIAL(1<<4);
727	}
728	if (flags & GMX_FORCE_DO_LR(1<<11))
729	{
730	donb_flags \|= GMX_NONBONDED_DO_LR(1<<0);
731	}
732
733	kernel_data.flags = donb_flags;
734	kernel_data.lambda = lambda;
735	kernel_data.dvdl = dvdl_nb;
736
737	kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR];
738	kernel_data.energygrp_vdw = enerd->grpp.ener[egLJSR];
739
740	/* reset free energy components */
741	for (i = 0; i < efptNR; i++)
742	{
743	dvdl_nb[i] = 0;
744	}
745
746	assert(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl)((void) (0));
747
748	wallcycle_sub_start(wcycle, ewcsNONBONDED);
749	#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
750	for (th = 0; th < nbl_lists->nnbl; th++)
751	{
752	gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
753	x, f, fr, mdatoms, &kernel_data, nrnb);
754	}
755
756	if (fepvals->sc_alpha != 0)
757	{
758	enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
759	enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
760	}
761	else
762	{
763	enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
764	enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
765	}
766
767	/* If we do foreign lambda and we have soft-core interactions
768	* we have to recalculate the (non-linear) energies contributions.
769	*/
770	if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL(1<<10)) && fepvals->sc_alpha != 0)
771	{
772	kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE(1<<1) \| GMX_NONBONDED_DO_SHIFTFORCE(1<<2))) \| GMX_NONBONDED_DO_FOREIGNLAMBDA(1<<3);
773	kernel_data.lambda = lam_i;
774	kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR];
775	kernel_data.energygrp_vdw = enerd->foreign_grpp.ener[egLJSR];
776	/* Note that we add to kernel_data.dvdl, but ignore the result */
777
778	for (i = 0; i < enerd->n_lambda; i++)
779	{
780	for (j = 0; j < efptNR; j++)
781	{
782	lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
783	}
784	reset_foreign_enerdata(enerd);
785	#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
786	for (th = 0; th < nbl_lists->nnbl; th++)
787	{
788	gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
789	x, f, fr, mdatoms, &kernel_data, nrnb);
790	}
791
792	sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
793	enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
794	}
795	}
796
797	wallcycle_sub_stop(wcycle, ewcsNONBONDED);
798	}
799
800	void do_force_cutsVERLET(FILE fplog, t_commrec cr,
801	t_inputrec *inputrec,
802	gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
803	gmx_localtop_t *top,
804	gmx_groups_t gmx_unused__attribute__ ((unused)) *groups,
805	matrix box, rvec x[], history_t *hist,
806	rvec f[],
807	tensor vir_force,
808	t_mdatoms *mdatoms,
809	gmx_enerdata_t enerd, t_fcdata fcd,
810	real lambda, t_graph graph,
811	t_forcerec fr, interaction_const_t ic,
812	gmx_vsite_t *vsite, rvec mu_tot,
813	double t, FILE *field, gmx_edsam_t ed,
814	gmx_bool bBornRadii,
815	int flags)
816	{
817	int cg0, cg1, i, j;
818	int start, homenr;
819	int nb_kernel_type;
820	double mu[2*DIM3];
821	gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
822	gmx_bool bDoLongRange, bDoForces, bSepLRF, bUseGPU, bUseOrEmulGPU;
823	gmx_bool bDiffKernels = FALSE0;
824	matrix boxs;
825	rvec vzero, box_diag;
826	real e, v, dvdl;
827	float cycles_pme, cycles_force, cycles_wait_gpu;
828	nonbonded_verlet_t *nbv;
829
830	cycles_force = 0;
831	cycles_wait_gpu = 0;
832	nbv = fr->nbv;
833	nb_kernel_type = fr->nbv->grp[0].kernel_type;
834
835	start = 0;
836	homenr = mdatoms->homenr;
837
838	bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
839
840	clear_mat(vir_force);
841
842	cg0 = 0;
	Value stored to 'cg0' is never read
843	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
844	{
845	cg1 = cr->dd->ncg_tot;
846	}
847	else
848	{
849	cg1 = top->cgs.nr;
850	}
851	if (fr->n_tpi > 0)
852	{
853	cg1--;
854	}
855
856	bStateChanged = (flags & GMX_FORCE_STATECHANGED(1<<0));
857	bNS = (flags & GMX_FORCE_NS(1<<2)) && (fr->bAllvsAll == FALSE0);
858	bFillGrid = (bNS && bStateChanged);
859	bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)));
860	bDoLongRange = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR(1<<11)));
861	bDoForces = (flags & GMX_FORCE_FORCES(1<<7));
862	bSepLRF = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF(1<<5)));
863	bUseGPU = fr->nbv->bUseGPU;
864	bUseOrEmulGPU = bUseGPU \|\| (nbv->grp[0].kernel_type == nbnxnk8x8x8_PlainC);
865
866	if (bStateChanged)
867	{
868	update_forcerec(fr, box);
869
870	if (NEED_MUTOT(inputrec)(((inputrec).coulombtype == eelEWALD \|\| (((inputrec).coulombtype ) == eelPME \|\| ((inputrec).coulombtype) == eelPMESWITCH \|\| ( (inputrec).coulombtype) == eelPMEUSER \|\| ((inputrec).coulombtype ) == eelPMEUSERSWITCH \|\| ((inputrec).coulombtype) == eelP3M_AD )) && ((inputrec).ewald_geometry == eewg3DC \|\| (*inputrec ).epsilon_surface != 0)))
871	{
872	/* Calculate total (local) dipole moment in a temporary common array.
873	* This makes it possible to sum them over nodes faster.
874	*/
875	calc_mu(start, homenr,
876	x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
877	mu, mu+DIM3);
878	}
879	}
880
881	if (fr->ePBC != epbcNONE)
882	{
883	/* Compute shift vectors every step,
884	* because of pressure coupling or box deformation!
885	*/
886	if ((flags & GMX_FORCE_DYNAMICBOX(1<<1)) && bStateChanged)
887	{
888	calc_shifts(box, fr->shift_vec);
889	}
890
891	if (bCalcCGCM)
892	{
893	put_atoms_in_box_omp(fr->ePBC, box, homenr, x);
894	inc_nrnb(nrnb, eNR_SHIFTX, homenr)(nrnb)->n[eNR_SHIFTX] += homenr;
895	}
896	else if (EI_ENERGY_MINIMIZATION(inputrec->eI)((inputrec->eI) == eiSteep \|\| (inputrec->eI) == eiCG \|\| (inputrec->eI) == eiLBFGS) && graph)
897	{
898	unshift_self(graph, box, x);
899	}
900	}
901
902	nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX(1<<1),
903	fr->shift_vec, nbv->grp[0].nbat);
904
905	#ifdef GMX_MPI
906	if (!(cr->duty & DUTY_PME(1<<1)))
907	{
908	/* Send particle coordinates to the pme nodes.
909	* Since this is only implemented for domain decomposition
910	* and domain decomposition does not use the graph,
911	* we do not need to worry about shifting.
912	*/
913
914	int pme_flags = 0;
915
916	wallcycle_start(wcycle, ewcPP_PMESENDX);
917
918	bBS = (inputrec->nwall == 2);
919	if (bBS)
920	{
921	copy_mat(box, boxs);
922	svmul(inputrec->wall_ewald_zfac, boxs[ZZ2], boxs[ZZ2]);
923	}
924
925	if (EEL_PME(fr->eeltype)((fr->eeltype) == eelPME \|\| (fr->eeltype) == eelPMESWITCH \|\| (fr->eeltype) == eelPMEUSER \|\| (fr->eeltype) == eelPMEUSERSWITCH \|\| (fr->eeltype) == eelP3M_AD))
926	{
927	pme_flags \|= GMX_PME_DO_COULOMB(1<<13);
928	}
929
930	if (EVDW_PME(fr->vdwtype)((fr->vdwtype) == evdwPME))
931	{
932	pme_flags \|= GMX_PME_DO_LJ(1<<14);
933	}
934
935	gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
936	mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
937	(flags & (GMX_FORCE_VIRIAL(1<<8) \| GMX_FORCE_ENERGY(1<<9))),
938	pme_flags, step);
939
940	wallcycle_stop(wcycle, ewcPP_PMESENDX);
941	}
942	#endif /* GMX_MPI */
943
944	/* do gridding for pair search */
945	if (bNS)
946	{
947	if (graph && bStateChanged)
948	{
949	/* Calculate intramolecular shift vectors to make molecules whole */
950	mk_mshift(fplog, graph, fr->ePBC, box, x);
951	}
952
953	clear_rvec(vzero);
954	box_diag[XX0] = box[XX0][XX0];
955	box_diag[YY1] = box[YY1][YY1];
956	box_diag[ZZ2] = box[ZZ2][ZZ2];
957
958	wallcycle_start(wcycle, ewcNS);
959	if (!fr->bDomDec)
960	{
961	wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
962	nbnxn_put_on_grid(nbv->nbs, fr->ePBC, box,
963	0, vzero, box_diag,
964	0, mdatoms->homenr, -1, fr->cginfo, x,
965	0, NULL((void*)0),
966	nbv->grp[eintLocal].kernel_type,
967	nbv->grp[eintLocal].nbat);
968	wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
969	}
970	else
971	{
972	wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
973	nbnxn_put_on_grid_nonlocal(nbv->nbs, domdec_zones(cr->dd),
974	fr->cginfo, x,
975	nbv->grp[eintNonlocal].kernel_type,
976	nbv->grp[eintNonlocal].nbat);
977	wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
978	}
979
980	if (nbv->ngrp == 1 \|\|
981	nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
982	{
983	nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatAll,
984	nbv->nbs, mdatoms, fr->cginfo);
985	}
986	else
987	{
988	nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatLocal,
989	nbv->nbs, mdatoms, fr->cginfo);
990	nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat, eatAll,
991	nbv->nbs, mdatoms, fr->cginfo);
992	}
993	wallcycle_stop(wcycle, ewcNS);
994	}
995
996	/* initialize the GPU atom data and copy shift vector */
997	if (bUseGPU)
998	{
999	if (bNS)
1000	{
1001	wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
1002	nbnxn_cuda_init_atomdata(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
1003	wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1004	}
1005
1006	wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
1007	nbnxn_cuda_upload_shiftvec(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
1008	wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1009	}
1010
1011	/* do local pair search */
1012	if (bNS)
1013	{
1014	wallcycle_start_nocount(wcycle, ewcNS);
1015	wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
1016	nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintLocal].nbat,
1017	&top->excls,
1018	ic->rlist,
1019	nbv->min_ci_balanced,
1020	&nbv->grp[eintLocal].nbl_lists,
1021	eintLocal,
1022	nbv->grp[eintLocal].kernel_type,
1023	nrnb);
1024	wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
1025
1026	if (bUseGPU)
1027	{
1028	/* initialize local pair-list on the GPU */
1029	nbnxn_cuda_init_pairlist(nbv->cu_nbv,
1030	nbv->grp[eintLocal].nbl_lists.nbl[0],
1031	eintLocal);
1032	}
1033	wallcycle_stop(wcycle, ewcNS);
1034	}
1035	else
1036	{
1037	wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1038	wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1039	nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE0, x,
1040	nbv->grp[eintLocal].nbat);
1041	wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1042	wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1043	}
1044
1045	if (bUseGPU)
1046	{
1047	wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
1048	/* launch local nonbonded F on GPU */
1049	do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
1050	nrnb, wcycle);
1051	wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1052	}
1053
1054	/* Communicate coordinates and sum dipole if necessary +
1055	do non-local pair search */
1056	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1057	{
1058	bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
1059	nbv->grp[eintLocal].kernel_type);
1060
1061	if (bDiffKernels)
1062	{
1063	/* With GPU+CPU non-bonded calculations we need to copy
1064	* the local coordinates to the non-local nbat struct
1065	* (in CPU format) as the non-local kernel call also
1066	* calculates the local - non-local interactions.
1067	*/
1068	wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1069	wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1070	nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, TRUE1, x,
1071	nbv->grp[eintNonlocal].nbat);
1072	wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1073	wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1074	}
1075
1076	if (bNS)
1077	{
1078	wallcycle_start_nocount(wcycle, ewcNS);
1079	wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
1080
1081	if (bDiffKernels)
1082	{
1083	nbnxn_grid_add_simple(nbv->nbs, nbv->grp[eintNonlocal].nbat);
1084	}
1085
1086	nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintNonlocal].nbat,
1087	&top->excls,
1088	ic->rlist,
1089	nbv->min_ci_balanced,
1090	&nbv->grp[eintNonlocal].nbl_lists,
1091	eintNonlocal,
1092	nbv->grp[eintNonlocal].kernel_type,
1093	nrnb);
1094
1095	wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
1096
1097	if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_CUDA)
1098	{
1099	/* initialize non-local pair-list on the GPU */
1100	nbnxn_cuda_init_pairlist(nbv->cu_nbv,
1101	nbv->grp[eintNonlocal].nbl_lists.nbl[0],
1102	eintNonlocal);
1103	}
1104	wallcycle_stop(wcycle, ewcNS);
1105	}
1106	else
1107	{
1108	wallcycle_start(wcycle, ewcMOVEX);
1109	dd_move_x(cr->dd, box, x);
1110
1111	/* When we don't need the total dipole we sum it in global_stat */
1112	if (bStateChanged && NEED_MUTOT(inputrec)(((inputrec).coulombtype == eelEWALD \|\| (((inputrec).coulombtype ) == eelPME \|\| ((inputrec).coulombtype) == eelPMESWITCH \|\| ( (inputrec).coulombtype) == eelPMEUSER \|\| ((inputrec).coulombtype ) == eelPMEUSERSWITCH \|\| ((inputrec).coulombtype) == eelP3M_AD )) && ((inputrec).ewald_geometry == eewg3DC \|\| (*inputrec ).epsilon_surface != 0)))
1113	{
1114	gmx_sumd(2*DIM3, mu, cr);
1115	}
1116	wallcycle_stop(wcycle, ewcMOVEX);
1117
1118	wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1119	wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
1120	nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE0, x,
1121	nbv->grp[eintNonlocal].nbat);
1122	wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
1123	cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1124	}
1125
1126	if (bUseGPU && !bDiffKernels)
1127	{
1128	wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
1129	/* launch non-local nonbonded F on GPU */
1130	do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
1131	nrnb, wcycle);
1132	cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1133	}
1134	}
1135
1136	if (bUseGPU)
1137	{
1138	/* launch D2H copy-back F */
1139	wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
1140	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && !bDiffKernels)
1141	{
1142	nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintNonlocal].nbat,
1143	flags, eatNonlocal);
1144	}
1145	nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintLocal].nbat,
1146	flags, eatLocal);
1147	cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1148	}
1149
1150	if (bStateChanged && NEED_MUTOT(inputrec)(((inputrec).coulombtype == eelEWALD \|\| (((inputrec).coulombtype ) == eelPME \|\| ((inputrec).coulombtype) == eelPMESWITCH \|\| ( (inputrec).coulombtype) == eelPMEUSER \|\| ((inputrec).coulombtype ) == eelPMEUSERSWITCH \|\| ((inputrec).coulombtype) == eelP3M_AD )) && ((inputrec).ewald_geometry == eewg3DC \|\| (*inputrec ).epsilon_surface != 0)))
1151	{
1152	if (PAR(cr)((cr)->nnodes > 1))
1153	{
1154	gmx_sumd(2*DIM3, mu, cr);
1155	}
1156
1157	for (i = 0; i < 2; i++)
1158	{
1159	for (j = 0; j < DIM3; j++)
1160	{
1161	fr->mu_tot[i][j] = mu[i*DIM3 + j];
1162	}
1163	}
1164	}
1165	if (fr->efep == efepNO)
1166	{
1167	copy_rvec(fr->mu_tot[0], mu_tot);
1168	}
1169	else
1170	{
1171	for (j = 0; j < DIM3; j++)
1172	{
1173	mu_tot[j] =
1174	(1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
1175	lambda[efptCOUL]*fr->mu_tot[1][j];
1176	}
1177	}
1178
1179	/* Reset energies */
1180	reset_enerdata(fr, bNS, enerd, MASTER(cr)(((cr)->nodeid == 0) \|\| !((cr)->nnodes > 1)));
1181	clear_rvecs(SHIFTS((21 +1)(21 +1)(2*2 +1)), fr->fshift);
1182
1183	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && !(cr->duty & DUTY_PME(1<<1)))
1184	{
1185	wallcycle_start(wcycle, ewcPPDURINGPME);
1186	dd_force_flop_start(cr->dd, nrnb);
1187	}
1188
1189	if (inputrec->bRot)
1190	{
1191	/* Enforced rotation has its own cycle counter that starts after the collective
1192	* coordinates have been communicated. It is added to ddCyclF to allow
1193	* for proper load-balancing */
1194	wallcycle_start(wcycle, ewcROT);
1195	do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
1196	wallcycle_stop(wcycle, ewcROT);
1197	}
1198
1199	/* Start the force cycle counter.
1200	* This counter is stopped in do_forcelow_level.
1201	* No parallel communication should occur while this counter is running,
1202	* since that will interfere with the dynamic load balancing.
1203	*/
1204	wallcycle_start(wcycle, ewcFORCE);
1205	if (bDoForces)
1206	{
1207	/* Reset forces for which the virial is calculated separately:
1208	* PME/Ewald forces if necessary */
1209	if (fr->bF_NoVirSum)
1210	{
1211	if (flags & GMX_FORCE_VIRIAL(1<<8))
1212	{
1213	fr->f_novirsum = fr->f_novirsum_alloc;
1214	if (fr->bDomDec)
1215	{
1216	clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
1217	}
1218	else
1219	{
1220	clear_rvecs(homenr, fr->f_novirsum+start);
1221	}
1222	}
1223	else
1224	{
1225	/* We are not calculating the pressure so we do not need
1226	* a separate array for forces that do not contribute
1227	* to the pressure.
1228	*/
1229	fr->f_novirsum = f;
1230	}
1231	}
1232
1233	/* Clear the short- and long-range forces */
1234	clear_rvecs(fr->natoms_force_constr, f);
1235	if (bSepLRF && do_per_step(step, inputrec->nstcalclr))
1236	{
1237	clear_rvecs(fr->natoms_force_constr, fr->f_twin);
1238	}
1239
1240	clear_rvec(fr->vir_diag_posres);
1241	}
1242
1243	if (inputrec->ePull == epullCONSTRAINT)
1244	{
1245	clear_pull_forces(inputrec->pull);
1246	}
1247
1248	/* We calculate the non-bonded forces, when done on the CPU, here.
1249	* We do this before calling do_force_lowlevel, as in there bondeds
1250	* forces are calculated before PME, which does communication.
1251	* With this order, non-bonded and bonded force calculation imbalance
1252	* can be balanced out by the domain decomposition load balancing.
1253	*/
1254
1255	if (!bUseOrEmulGPU)
1256	{
1257	/* Maybe we should move this into do_force_lowlevel */
1258	do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
1259	nrnb, wcycle);
1260	}
1261
1262	if (fr->efep != efepNO)
1263	{
1264	/* Calculate the local and non-local free energy interactions here.
1265	* Happens here on the CPU both with and without GPU.
1266	*/
1267	if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0)
1268	{
1269	do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists,
1270	fr, x, f, mdatoms,
1271	inputrec->fepvals, lambda,
1272	enerd, flags, nrnb, wcycle);
1273	}
1274
1275	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) &&
1276	fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0)
1277	{
1278	do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists,
1279	fr, x, f, mdatoms,
1280	inputrec->fepvals, lambda,
1281	enerd, flags, nrnb, wcycle);
1282	}
1283	}
1284
1285	if (!bUseOrEmulGPU \|\| bDiffKernels)
1286	{
1287	int aloc;
1288
1289	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1290	{
1291	do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
1292	bDiffKernels ? enbvClearFYes : enbvClearFNo,
1293	nrnb, wcycle);
1294	}
1295
1296	if (!bUseOrEmulGPU)
1297	{
1298	aloc = eintLocal;
1299	}
1300	else
1301	{
1302	aloc = eintNonlocal;
1303	}
1304
1305	/* Add all the non-bonded force to the normal force array.
1306	* This can be split into a local a non-local part when overlapping
1307	* communication with calculation with domain decomposition.
1308	*/
1309	cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1310	wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1311	wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1312	nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->grp[aloc].nbat, f);
1313	wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1314	cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1315	wallcycle_start_nocount(wcycle, ewcFORCE);
1316
1317	/* if there are multiple fshift output buffers reduce them */
1318	if ((flags & GMX_FORCE_VIRIAL(1<<8)) &&
1319	nbv->grp[aloc].nbl_lists.nnbl > 1)
1320	{
1321	nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
1322	fr->fshift);
1323	}
1324	}
1325
1326	/* update QMMMrec, if necessary */
1327	if (fr->bQMMM)
1328	{
1329	update_QMMMrec(cr, fr, x, mdatoms, box, top);
1330	}
1331
1332	if ((flags & GMX_FORCE_BONDED(1<<4)) && top->idef.il[F_POSRES].nr > 0)
1333	{
1334	posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
1335	enerd, lambda, fr);
1336	}
1337
1338	if ((flags & GMX_FORCE_BONDED(1<<4)) && top->idef.il[F_FBPOSRES].nr > 0)
1339	{
1340	fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
1341	}
1342
1343	/* Compute the bonded and non-bonded energies and optionally forces */
1344	do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
1345	cr, nrnb, wcycle, mdatoms,
1346	x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
1347	&(top->atomtypes), bBornRadii, box,
1348	inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
1349	flags, &cycles_pme);
1350
1351	if (bSepLRF)
1352	{
1353	if (do_per_step(step, inputrec->nstcalclr))
1354	{
1355	/* Add the long range forces to the short range forces */
1356	for (i = 0; i < fr->natoms_force_constr; i++)
1357	{
1358	rvec_add(fr->f_twin[i], f[i], f[i]);
1359	}
1360	}
1361	}
1362
1363	cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1364
1365	if (ed)
1366	{
1367	do_flood(cr, inputrec, x, f, ed, box, step, bNS);
1368	}
1369
1370	if (bUseOrEmulGPU && !bDiffKernels)
1371	{
1372	/* wait for non-local forces (or calculate in emulation mode) */
1373	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1374	{
1375	if (bUseGPU)
1376	{
1377	float cycles_tmp;
1378
1379	wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
1380	nbnxn_cuda_wait_gpu(nbv->cu_nbv,
1381	nbv->grp[eintNonlocal].nbat,
1382	flags, eatNonlocal,
1383	enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
1384	fr->fshift);
1385	cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
1386	cycles_wait_gpu += cycles_tmp;
1387	cycles_force += cycles_tmp;
1388	}
1389	else
1390	{
1391	wallcycle_start_nocount(wcycle, ewcFORCE);
1392	do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
1393	nrnb, wcycle);
1394	cycles_force += wallcycle_stop(wcycle, ewcFORCE);
1395	}
1396	wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1397	wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1398	/* skip the reduction if there was no non-local work to do */
1399	if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
1400	{
1401	nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
1402	nbv->grp[eintNonlocal].nbat, f);
1403	}
1404	wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1405	cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1406	}
1407	}
1408
1409	if (bDoForces && DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1410	{
1411	/* Communicate the forces */
1412	wallcycle_start(wcycle, ewcMOVEF);
1413	dd_move_f(cr->dd, f, fr->fshift);
1414	/* Do we need to communicate the separate force array
1415	* for terms that do not contribute to the single sum virial?
1416	* Position restraints and electric fields do not introduce
1417	* inter-cg forces, only full electrostatics methods do.
1418	* When we do not calculate the virial, fr->f_novirsum = f,
1419	* so we have already communicated these forces.
1420	*/
1421	if (EEL_FULL(fr->eeltype)((((fr->eeltype) == eelPME \|\| (fr->eeltype) == eelPMESWITCH \|\| (fr->eeltype) == eelPMEUSER \|\| (fr->eeltype) == eelPMEUSERSWITCH \|\| (fr->eeltype) == eelP3M_AD) \|\| (fr->eeltype) == eelEWALD ) \|\| (fr->eeltype) == eelPOISSON) && cr->dd->n_intercg_excl &&
1422	(flags & GMX_FORCE_VIRIAL(1<<8)))
1423	{
1424	dd_move_f(cr->dd, fr->f_novirsum, NULL((void*)0));
1425	}
1426	if (bSepLRF)
1427	{
1428	/* We should not update the shift forces here,
1429	* since f_twin is already included in f.
1430	*/
1431	dd_move_f(cr->dd, fr->f_twin, NULL((void*)0));
1432	}
1433	wallcycle_stop(wcycle, ewcMOVEF);
1434	}
1435
1436	if (bUseOrEmulGPU)
1437	{
1438	/* wait for local forces (or calculate in emulation mode) */
1439	if (bUseGPU)
1440	{
1441	wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
1442	nbnxn_cuda_wait_gpu(nbv->cu_nbv,
1443	nbv->grp[eintLocal].nbat,
1444	flags, eatLocal,
1445	enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
1446	fr->fshift);
1447	cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
1448
1449	/* now clear the GPU outputs while we finish the step on the CPU */
1450
1451	wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
1452	nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags);
1453	wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
1454	}
1455	else
1456	{
1457	wallcycle_start_nocount(wcycle, ewcFORCE);
1458	do_nb_verlet(fr, ic, enerd, flags, eintLocal,
1459	DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) ? enbvClearFNo : enbvClearFYes,
1460	nrnb, wcycle);
1461	wallcycle_stop(wcycle, ewcFORCE);
1462	}
1463	wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
1464	wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
1465	if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
1466	{
1467	/* skip the reduction if there was no non-local work to do */
1468	nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
1469	nbv->grp[eintLocal].nbat, f);
1470	}
1471	wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
1472	wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
1473	}
1474
1475	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1476	{
1477	dd_force_flop_stop(cr->dd, nrnb);
1478	if (wcycle)
1479	{
1480	dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
1481	if (bUseGPU)
1482	{
1483	dd_cycles_add(cr->dd, cycles_wait_gpu, ddCyclWaitGPU);
1484	}
1485	}
1486	}
1487
1488	if (bDoForces)
1489	{
1490	if (IR_ELEC_FIELD(inputrec)((inputrec).ex[0].n > 0 \|\| (inputrec).ex[1].n > 0 \|\| ( inputrec).ex[2].n > 0))
1491	{
1492	/* Compute forces due to electric field */
1493	calc_f_el(MASTER(cr)(((cr)->nodeid == 0) \|\| !((cr)->nnodes > 1)) ? field : NULL((void*)0),
1494	start, homenr, mdatoms->chargeA, fr->f_novirsum,
1495	inputrec->ex, inputrec->et, t);
1496	}
1497
1498	/* If we have NoVirSum forces, but we do not calculate the virial,
1499	* we sum fr->f_novirum=f later.
1500	*/
1501	if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL(1<<8))))
1502	{
1503	wallcycle_start(wcycle, ewcVSITESPREAD);
1504	spread_vsite_f(vsite, x, f, fr->fshift, FALSE0, NULL((void*)0), nrnb,
1505	&top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1506	wallcycle_stop(wcycle, ewcVSITESPREAD);
1507
1508	if (bSepLRF)
1509	{
1510	wallcycle_start(wcycle, ewcVSITESPREAD);
1511	spread_vsite_f(vsite, x, fr->f_twin, NULL((void)0), FALSE0, NULL((void)0),
1512	nrnb,
1513	&top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1514	wallcycle_stop(wcycle, ewcVSITESPREAD);
1515	}
1516	}
1517
1518	if (flags & GMX_FORCE_VIRIAL(1<<8))
1519	{
1520	/* Calculation of the virial must be done after vsites! */
1521	calc_virial(0, mdatoms->homenr, x, f,
1522	vir_force, graph, box, nrnb, fr, inputrec->ePBC);
1523	}
1524	}
1525
1526	if (inputrec->ePull == epullUMBRELLA \|\| inputrec->ePull == epullCONST_F)
1527	{
1528	pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
1529	f, vir_force, mdatoms, enerd, lambda, t);
1530	}
1531
1532	/* Add the forces from enforced rotation potentials (if any) */
1533	if (inputrec->bRot)
1534	{
1535	wallcycle_start(wcycle, ewcROTadd);
1536	enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
1537	wallcycle_stop(wcycle, ewcROTadd);
1538	}
1539
1540	/* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
1541	IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
1542
1543	if (PAR(cr)((cr)->nnodes > 1) && !(cr->duty & DUTY_PME(1<<1)))
1544	{
1545	/* In case of node-splitting, the PP nodes receive the long-range
1546	* forces, virial and energy from the PME nodes here.
1547	*/
1548	pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
1549	}
1550
1551	if (bDoForces)
1552	{
1553	post_process_forces(cr, step, nrnb, wcycle,
1554	top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
1555	flags);
1556	}
1557
1558	/* Sum the potential energy terms from group contributions */
1559	sum_epot(&(enerd->grpp), enerd->term);
1560	}
1561
1562	void do_force_cutsGROUP(FILE fplog, t_commrec cr,
1563	t_inputrec *inputrec,
1564	gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1565	gmx_localtop_t *top,
1566	gmx_groups_t *groups,
1567	matrix box, rvec x[], history_t *hist,
1568	rvec f[],
1569	tensor vir_force,
1570	t_mdatoms *mdatoms,
1571	gmx_enerdata_t enerd, t_fcdata fcd,
1572	real lambda, t_graph graph,
1573	t_forcerec fr, gmx_vsite_t vsite, rvec mu_tot,
1574	double t, FILE *field, gmx_edsam_t ed,
1575	gmx_bool bBornRadii,
1576	int flags)
1577	{
1578	int cg0, cg1, i, j;
1579	int start, homenr;
1580	double mu[2*DIM3];
1581	gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
1582	gmx_bool bDoLongRangeNS, bDoForces, bDoPotential, bSepLRF;
1583	gmx_bool bDoAdressWF;
1584	matrix boxs;
1585	rvec vzero, box_diag;
1586	real e, v, dvdlambda[efptNR];
1587	t_pbc pbc;
1588	float cycles_pme, cycles_force;
1589
1590	start = 0;
1591	homenr = mdatoms->homenr;
1592
1593	bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
1594
1595	clear_mat(vir_force);
1596
1597	cg0 = 0;
1598	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1599	{
1600	cg1 = cr->dd->ncg_tot;
1601	}
1602	else
1603	{
1604	cg1 = top->cgs.nr;
1605	}
1606	if (fr->n_tpi > 0)
1607	{
1608	cg1--;
1609	}
1610
1611	bStateChanged = (flags & GMX_FORCE_STATECHANGED(1<<0));
1612	bNS = (flags & GMX_FORCE_NS(1<<2)) && (fr->bAllvsAll == FALSE0);
1613	/* Should we update the long-range neighborlists at this step? */
1614	bDoLongRangeNS = fr->bTwinRange && bNS;
1615	/* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
1616	bFillGrid = (bNS && bStateChanged);
1617	bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)));
1618	bDoForces = (flags & GMX_FORCE_FORCES(1<<7));
1619	bDoPotential = (flags & GMX_FORCE_ENERGY(1<<9));
1620	bSepLRF = ((inputrec->nstcalclr > 1) && bDoForces &&
1621	(flags & GMX_FORCE_SEPLRF(1<<5)) && (flags & GMX_FORCE_DO_LR(1<<11)));
1622
1623	/* should probably move this to the forcerec since it doesn't change */
1624	bDoAdressWF = ((fr->adress_type != eAdressOff));
1625
1626	if (bStateChanged)
1627	{
1628	update_forcerec(fr, box);
1629
1630	if (NEED_MUTOT(inputrec)(((inputrec).coulombtype == eelEWALD \|\| (((inputrec).coulombtype ) == eelPME \|\| ((inputrec).coulombtype) == eelPMESWITCH \|\| ( (inputrec).coulombtype) == eelPMEUSER \|\| ((inputrec).coulombtype ) == eelPMEUSERSWITCH \|\| ((inputrec).coulombtype) == eelP3M_AD )) && ((inputrec).ewald_geometry == eewg3DC \|\| (*inputrec ).epsilon_surface != 0)))
1631	{
1632	/* Calculate total (local) dipole moment in a temporary common array.
1633	* This makes it possible to sum them over nodes faster.
1634	*/
1635	calc_mu(start, homenr,
1636	x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
1637	mu, mu+DIM3);
1638	}
1639	}
1640
1641	if (fr->ePBC != epbcNONE)
1642	{
1643	/* Compute shift vectors every step,
1644	* because of pressure coupling or box deformation!
1645	*/
1646	if ((flags & GMX_FORCE_DYNAMICBOX(1<<1)) && bStateChanged)
1647	{
1648	calc_shifts(box, fr->shift_vec);
1649	}
1650
1651	if (bCalcCGCM)
1652	{
1653	put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, box,
1654	&(top->cgs), x, fr->cg_cm);
1655	inc_nrnb(nrnb, eNR_CGCM, homenr)(nrnb)->n[eNR_CGCM] += homenr;
1656	inc_nrnb(nrnb, eNR_RESETX, cg1-cg0)(nrnb)->n[eNR_RESETX] += cg1-cg0;
1657	}
1658	else if (EI_ENERGY_MINIMIZATION(inputrec->eI)((inputrec->eI) == eiSteep \|\| (inputrec->eI) == eiCG \|\| (inputrec->eI) == eiLBFGS) && graph)
1659	{
1660	unshift_self(graph, box, x);
1661	}
1662	}
1663	else if (bCalcCGCM)
1664	{
1665	calc_cgcm(fplog, cg0, cg1, &(top->cgs), x, fr->cg_cm);
1666	inc_nrnb(nrnb, eNR_CGCM, homenr)(nrnb)->n[eNR_CGCM] += homenr;
1667	}
1668
1669	if (bCalcCGCM && gmx_debug_at)
1670	{
1671	pr_rvecs(debug, 0, "cgcm", fr->cg_cm, top->cgs.nr);
1672	}
1673
1674	#ifdef GMX_MPI
1675	if (!(cr->duty & DUTY_PME(1<<1)))
1676	{
1677	/* Send particle coordinates to the pme nodes.
1678	* Since this is only implemented for domain decomposition
1679	* and domain decomposition does not use the graph,
1680	* we do not need to worry about shifting.
1681	*/
1682
1683	int pme_flags = 0;
1684
1685	wallcycle_start(wcycle, ewcPP_PMESENDX);
1686
1687	bBS = (inputrec->nwall == 2);
1688	if (bBS)
1689	{
1690	copy_mat(box, boxs);
1691	svmul(inputrec->wall_ewald_zfac, boxs[ZZ2], boxs[ZZ2]);
1692	}
1693
1694	if (EEL_PME(fr->eeltype)((fr->eeltype) == eelPME \|\| (fr->eeltype) == eelPMESWITCH \|\| (fr->eeltype) == eelPMEUSER \|\| (fr->eeltype) == eelPMEUSERSWITCH \|\| (fr->eeltype) == eelP3M_AD))
1695	{
1696	pme_flags \|= GMX_PME_DO_COULOMB(1<<13);
1697	}
1698
1699	if (EVDW_PME(fr->vdwtype)((fr->vdwtype) == evdwPME))
1700	{
1701	pme_flags \|= GMX_PME_DO_LJ(1<<14);
1702	}
1703
1704	gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
1705	mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
1706	(flags & (GMX_FORCE_VIRIAL(1<<8) \| GMX_FORCE_ENERGY(1<<9))),
1707	pme_flags, step);
1708
1709	wallcycle_stop(wcycle, ewcPP_PMESENDX);
1710	}
1711	#endif /* GMX_MPI */
1712
1713	/* Communicate coordinates and sum dipole if necessary */
1714	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1715	{
1716	wallcycle_start(wcycle, ewcMOVEX);
1717	dd_move_x(cr->dd, box, x);
1718	wallcycle_stop(wcycle, ewcMOVEX);
1719	}
1720
1721	/* update adress weight beforehand */
1722	if (bStateChanged && bDoAdressWF)
1723	{
1724	/* need pbc for adress weight calculation with pbc_dx */
1725	set_pbc(&pbc, inputrec->ePBC, box);
1726	if (fr->adress_site == eAdressSITEcog)
1727	{
1728	update_adress_weights_cog(top->idef.iparams, top->idef.il, x, fr, mdatoms,
1729	inputrec->ePBC == epbcNONE ? NULL((void*)0) : &pbc);
1730	}
1731	else if (fr->adress_site == eAdressSITEcom)
1732	{
1733	update_adress_weights_com(fplog, cg0, cg1, &(top->cgs), x, fr, mdatoms,
1734	inputrec->ePBC == epbcNONE ? NULL((void*)0) : &pbc);
1735	}
1736	else if (fr->adress_site == eAdressSITEatomatom)
1737	{
1738	update_adress_weights_atom_per_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms,
1739	inputrec->ePBC == epbcNONE ? NULL((void*)0) : &pbc);
1740	}
1741	else
1742	{
1743	update_adress_weights_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms,
1744	inputrec->ePBC == epbcNONE ? NULL((void*)0) : &pbc);
1745	}
1746	}
1747
1748	if (NEED_MUTOT(inputrec)(((inputrec).coulombtype == eelEWALD \|\| (((inputrec).coulombtype ) == eelPME \|\| ((inputrec).coulombtype) == eelPMESWITCH \|\| ( (inputrec).coulombtype) == eelPMEUSER \|\| ((inputrec).coulombtype ) == eelPMEUSERSWITCH \|\| ((inputrec).coulombtype) == eelP3M_AD )) && ((inputrec).ewald_geometry == eewg3DC \|\| (*inputrec ).epsilon_surface != 0)))
1749	{
1750
1751	if (bStateChanged)
1752	{
1753	if (PAR(cr)((cr)->nnodes > 1))
1754	{
1755	gmx_sumd(2*DIM3, mu, cr);
1756	}
1757	for (i = 0; i < 2; i++)
1758	{
1759	for (j = 0; j < DIM3; j++)
1760	{
1761	fr->mu_tot[i][j] = mu[i*DIM3 + j];
1762	}
1763	}
1764	}
1765	if (fr->efep == efepNO)
1766	{
1767	copy_rvec(fr->mu_tot[0], mu_tot);
1768	}
1769	else
1770	{
1771	for (j = 0; j < DIM3; j++)
1772	{
1773	mu_tot[j] =
1774	(1.0 - lambda[efptCOUL])fr->mu_tot[0][j] + lambda[efptCOUL]fr->mu_tot[1][j];
1775	}
1776	}
1777	}
1778
1779	/* Reset energies */
1780	reset_enerdata(fr, bNS, enerd, MASTER(cr)(((cr)->nodeid == 0) \|\| !((cr)->nnodes > 1)));
1781	clear_rvecs(SHIFTS((21 +1)(21 +1)(2*2 +1)), fr->fshift);
1782
1783	if (bNS)
1784	{
1785	wallcycle_start(wcycle, ewcNS);
1786
1787	if (graph && bStateChanged)
1788	{
1789	/* Calculate intramolecular shift vectors to make molecules whole */
1790	mk_mshift(fplog, graph, fr->ePBC, box, x);
1791	}
1792
1793	/* Do the actual neighbour searching */
1794	ns(fplog, fr, box,
1795	groups, top, mdatoms,
1796	cr, nrnb, bFillGrid,
1797	bDoLongRangeNS);
1798
1799	wallcycle_stop(wcycle, ewcNS);
1800	}
1801
1802	if (inputrec->implicit_solvent && bNS)
1803	{
1804	make_gb_nblist(cr, inputrec->gb_algorithm,
1805	x, box, fr, &top->idef, graph, fr->born);
1806	}
1807
1808	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)) && !(cr->duty & DUTY_PME(1<<1)))
1809	{
1810	wallcycle_start(wcycle, ewcPPDURINGPME);
1811	dd_force_flop_start(cr->dd, nrnb);
1812	}
1813
1814	if (inputrec->bRot)
1815	{
1816	/* Enforced rotation has its own cycle counter that starts after the collective
1817	* coordinates have been communicated. It is added to ddCyclF to allow
1818	* for proper load-balancing */
1819	wallcycle_start(wcycle, ewcROT);
1820	do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
1821	wallcycle_stop(wcycle, ewcROT);
1822	}
1823
1824	/* Start the force cycle counter.
1825	* This counter is stopped in do_forcelow_level.
1826	* No parallel communication should occur while this counter is running,
1827	* since that will interfere with the dynamic load balancing.
1828	*/
1829	wallcycle_start(wcycle, ewcFORCE);
1830
1831	if (bDoForces)
1832	{
1833	/* Reset forces for which the virial is calculated separately:
1834	* PME/Ewald forces if necessary */
1835	if (fr->bF_NoVirSum)
1836	{
1837	if (flags & GMX_FORCE_VIRIAL(1<<8))
1838	{
1839	fr->f_novirsum = fr->f_novirsum_alloc;
1840	if (fr->bDomDec)
1841	{
1842	clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
1843	}
1844	else
1845	{
1846	clear_rvecs(homenr, fr->f_novirsum+start);
1847	}
1848	}
1849	else
1850	{
1851	/* We are not calculating the pressure so we do not need
1852	* a separate array for forces that do not contribute
1853	* to the pressure.
1854	*/
1855	fr->f_novirsum = f;
1856	}
1857	}
1858
1859	/* Clear the short- and long-range forces */
1860	clear_rvecs(fr->natoms_force_constr, f);
1861	if (bSepLRF && do_per_step(step, inputrec->nstcalclr))
1862	{
1863	clear_rvecs(fr->natoms_force_constr, fr->f_twin);
1864	}
1865
1866	clear_rvec(fr->vir_diag_posres);
1867	}
1868	if (inputrec->ePull == epullCONSTRAINT)
1869	{
1870	clear_pull_forces(inputrec->pull);
1871	}
1872
1873	/* update QMMMrec, if necessary */
1874	if (fr->bQMMM)
1875	{
1876	update_QMMMrec(cr, fr, x, mdatoms, box, top);
1877	}
1878
1879	if ((flags & GMX_FORCE_BONDED(1<<4)) && top->idef.il[F_POSRES].nr > 0)
1880	{
1881	posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
1882	enerd, lambda, fr);
1883	}
1884
1885	if ((flags & GMX_FORCE_BONDED(1<<4)) && top->idef.il[F_FBPOSRES].nr > 0)
1886	{
1887	fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
1888	}
1889
1890	/* Compute the bonded and non-bonded energies and optionally forces */
1891	do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
1892	cr, nrnb, wcycle, mdatoms,
1893	x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
1894	&(top->atomtypes), bBornRadii, box,
1895	inputrec->fepvals, lambda,
1896	graph, &(top->excls), fr->mu_tot,
1897	flags,
1898	&cycles_pme);
1899
1900	if (bSepLRF)
1901	{
1902	if (do_per_step(step, inputrec->nstcalclr))
1903	{
1904	/* Add the long range forces to the short range forces */
1905	for (i = 0; i < fr->natoms_force_constr; i++)
1906	{
1907	rvec_add(fr->f_twin[i], f[i], f[i]);
1908	}
1909	}
1910	}
1911
1912	cycles_force = wallcycle_stop(wcycle, ewcFORCE);
1913
1914	if (ed)
1915	{
1916	do_flood(cr, inputrec, x, f, ed, box, step, bNS);
1917	}
1918
1919	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1920	{
1921	dd_force_flop_stop(cr->dd, nrnb);
1922	if (wcycle)
1923	{
1924	dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
1925	}
1926	}
1927
1928	if (bDoForces)
1929	{
1930	if (IR_ELEC_FIELD(inputrec)((inputrec).ex[0].n > 0 \|\| (inputrec).ex[1].n > 0 \|\| ( inputrec).ex[2].n > 0))
1931	{
1932	/* Compute forces due to electric field */
1933	calc_f_el(MASTER(cr)(((cr)->nodeid == 0) \|\| !((cr)->nnodes > 1)) ? field : NULL((void*)0),
1934	start, homenr, mdatoms->chargeA, fr->f_novirsum,
1935	inputrec->ex, inputrec->et, t);
1936	}
1937
1938	if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
1939	{
1940	/* Compute thermodynamic force in hybrid AdResS region */
1941	adress_thermo_force(start, homenr, &(top->cgs), x, fr->f_novirsum, fr, mdatoms,
1942	inputrec->ePBC == epbcNONE ? NULL((void*)0) : &pbc);
1943	}
1944
1945	/* Communicate the forces */
1946	if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
1947	{
1948	wallcycle_start(wcycle, ewcMOVEF);
1949	dd_move_f(cr->dd, f, fr->fshift);
1950	/* Do we need to communicate the separate force array
1951	* for terms that do not contribute to the single sum virial?
1952	* Position restraints and electric fields do not introduce
1953	* inter-cg forces, only full electrostatics methods do.
1954	* When we do not calculate the virial, fr->f_novirsum = f,
1955	* so we have already communicated these forces.
1956	*/
1957	if (EEL_FULL(fr->eeltype)((((fr->eeltype) == eelPME \|\| (fr->eeltype) == eelPMESWITCH \|\| (fr->eeltype) == eelPMEUSER \|\| (fr->eeltype) == eelPMEUSERSWITCH \|\| (fr->eeltype) == eelP3M_AD) \|\| (fr->eeltype) == eelEWALD ) \|\| (fr->eeltype) == eelPOISSON) && cr->dd->n_intercg_excl &&
1958	(flags & GMX_FORCE_VIRIAL(1<<8)))
1959	{
1960	dd_move_f(cr->dd, fr->f_novirsum, NULL((void*)0));
1961	}
1962	if (bSepLRF)
1963	{
1964	/* We should not update the shift forces here,
1965	* since f_twin is already included in f.
1966	*/
1967	dd_move_f(cr->dd, fr->f_twin, NULL((void*)0));
1968	}
1969	wallcycle_stop(wcycle, ewcMOVEF);
1970	}
1971
1972	/* If we have NoVirSum forces, but we do not calculate the virial,
1973	* we sum fr->f_novirum=f later.
1974	*/
1975	if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL(1<<8))))
1976	{
1977	wallcycle_start(wcycle, ewcVSITESPREAD);
1978	spread_vsite_f(vsite, x, f, fr->fshift, FALSE0, NULL((void*)0), nrnb,
1979	&top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1980	wallcycle_stop(wcycle, ewcVSITESPREAD);
1981
1982	if (bSepLRF)
1983	{
1984	wallcycle_start(wcycle, ewcVSITESPREAD);
1985	spread_vsite_f(vsite, x, fr->f_twin, NULL((void)0), FALSE0, NULL((void)0),
1986	nrnb,
1987	&top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
1988	wallcycle_stop(wcycle, ewcVSITESPREAD);
1989	}
1990	}
1991
1992	if (flags & GMX_FORCE_VIRIAL(1<<8))
1993	{
1994	/* Calculation of the virial must be done after vsites! */
1995	calc_virial(0, mdatoms->homenr, x, f,
1996	vir_force, graph, box, nrnb, fr, inputrec->ePBC);
1997	}
1998	}
1999
2000	if (inputrec->ePull == epullUMBRELLA \|\| inputrec->ePull == epullCONST_F)
2001	{
2002	pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
2003	f, vir_force, mdatoms, enerd, lambda, t);
2004	}
2005
2006	/* Add the forces from enforced rotation potentials (if any) */
2007	if (inputrec->bRot)
2008	{
2009	wallcycle_start(wcycle, ewcROTadd);
2010	enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
2011	wallcycle_stop(wcycle, ewcROTadd);
2012	}
2013
2014	/* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
2015	IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
2016
2017	if (PAR(cr)((cr)->nnodes > 1) && !(cr->duty & DUTY_PME(1<<1)))
2018	{
2019	/* In case of node-splitting, the PP nodes receive the long-range
2020	* forces, virial and energy from the PME nodes here.
2021	*/
2022	pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
2023	}
2024
2025	if (bDoForces)
2026	{
2027	post_process_forces(cr, step, nrnb, wcycle,
2028	top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
2029	flags);
2030	}
2031
2032	/* Sum the potential energy terms from group contributions */
2033	sum_epot(&(enerd->grpp), enerd->term);
2034	}
2035
2036	void do_force(FILE fplog, t_commrec cr,
2037	t_inputrec *inputrec,
2038	gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2039	gmx_localtop_t *top,
2040	gmx_groups_t *groups,
2041	matrix box, rvec x[], history_t *hist,
2042	rvec f[],
2043	tensor vir_force,
2044	t_mdatoms *mdatoms,
2045	gmx_enerdata_t enerd, t_fcdata fcd,
2046	real lambda, t_graph graph,
2047	t_forcerec *fr,
2048	gmx_vsite_t *vsite, rvec mu_tot,
2049	double t, FILE *field, gmx_edsam_t ed,
2050	gmx_bool bBornRadii,
2051	int flags)
2052	{
2053	/* modify force flag if not doing nonbonded */
2054	if (!fr->bNonbonded)
2055	{
2056	flags &= ~GMX_FORCE_NONBONDED(1<<6);
2057	}
2058
2059	switch (inputrec->cutoff_scheme)
2060	{
2061	case ecutsVERLET:
2062	do_force_cutsVERLET(fplog, cr, inputrec,
2063	step, nrnb, wcycle,
2064	top,
2065	groups,
2066	box, x, hist,
2067	f, vir_force,
2068	mdatoms,
2069	enerd, fcd,
2070	lambda, graph,
2071	fr, fr->ic,
2072	vsite, mu_tot,
2073	t, field, ed,
2074	bBornRadii,
2075	flags);
2076	break;
2077	case ecutsGROUP:
2078	do_force_cutsGROUP(fplog, cr, inputrec,
2079	step, nrnb, wcycle,
2080	top,
2081	groups,
2082	box, x, hist,
2083	f, vir_force,
2084	mdatoms,
2085	enerd, fcd,
2086	lambda, graph,
2087	fr, vsite, mu_tot,
2088	t, field, ed,
2089	bBornRadii,
2090	flags);
2091	break;
2092	default:
2093	gmx_incons("Invalid cut-off scheme passed!")_gmx_error("incons", "Invalid cut-off scheme passed!", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2093);
2094	}
2095	}
2096
2097
2098	void do_constrain_first(FILE *fplog, gmx_constr_t constr,
2099	t_inputrec ir, t_mdatoms md,
2100	t_state state, t_commrec cr, t_nrnb *nrnb,
2101	t_forcerec fr, gmx_localtop_t top)
2102	{
2103	int i, m, start, end;
2104	gmx_int64_t step;
2105	real dt = ir->delta_t;
2106	real dvdl_dum;
2107	rvec *savex;
2108
2109	snew(savex, state->natoms)(savex) = save_calloc("savex", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2109, (state->natoms), sizeof(*(savex)));
2110
2111	start = 0;
2112	end = md->homenr;
2113
2114	if (debug)
2115	{
2116	fprintf(debug, "vcm: start=%d, homenr=%d, end=%d\n",
2117	start, md->homenr, end);
2118	}
2119	/* Do a first constrain to reset particles... */
2120	step = ir->init_step;
2121	if (fplog)
2122	{
2123	char buf[STEPSTRSIZE22];
2124	fprintf(fplog, "\nConstraining the starting coordinates (step %s)\n",
2125	gmx_step_str(step, buf));
2126	}
2127	dvdl_dum = 0;
2128
2129	/* constrain the current position */
2130	constrain(NULL((void*)0), TRUE1, FALSE0, constr, &(top->idef),
2131	ir, NULL((void*)0), cr, step, 0, md,
2132	state->x, state->x, NULL((void*)0),
2133	fr->bMolPBC, state->box,
2134	state->lambda[efptBONDED], &dvdl_dum,
2135	NULL((void)0), NULL((void)0), nrnb, econqCoord,
2136	ir->epc == epcMTTK, state->veta, state->veta);
2137	if (EI_VV(ir->eI)((ir->eI) == eiVV \|\| (ir->eI) == eiVVAK))
2138	{
2139	/* constrain the inital velocity, and save it */
2140	/* also may be useful if we need the ekin from the halfstep for velocity verlet */
2141	/* might not yet treat veta correctly */
2142	constrain(NULL((void*)0), TRUE1, FALSE0, constr, &(top->idef),
2143	ir, NULL((void*)0), cr, step, 0, md,
2144	state->x, state->v, state->v,
2145	fr->bMolPBC, state->box,
2146	state->lambda[efptBONDED], &dvdl_dum,
2147	NULL((void)0), NULL((void)0), nrnb, econqVeloc,
2148	ir->epc == epcMTTK, state->veta, state->veta);
2149	}
2150	/* constrain the inital velocities at t-dt/2 */
2151	if (EI_STATE_VELOCITY(ir->eI)(((ir->eI) == eiMD \|\| ((ir->eI) == eiVV \|\| (ir->eI) == eiVVAK)) \|\| ((ir->eI) == eiSD1 \|\| (ir->eI) == eiSD2)) && ir->eI != eiVV)
2152	{
2153	for (i = start; (i < end); i++)
2154	{
2155	for (m = 0; (m < DIM3); m++)
2156	{
2157	/* Reverse the velocity */
2158	state->v[i][m] = -state->v[i][m];
2159	/* Store the position at t-dt in buf */
2160	savex[i][m] = state->x[i][m] + dt*state->v[i][m];
2161	}
2162	}
2163	/* Shake the positions at t=-dt with the positions at t=0
2164	* as reference coordinates.
2165	*/
2166	if (fplog)
2167	{
2168	char buf[STEPSTRSIZE22];
2169	fprintf(fplog, "\nConstraining the coordinates at t0-dt (step %s)\n",
2170	gmx_step_str(step, buf));
2171	}
2172	dvdl_dum = 0;
2173	constrain(NULL((void*)0), TRUE1, FALSE0, constr, &(top->idef),
2174	ir, NULL((void*)0), cr, step, -1, md,
2175	state->x, savex, NULL((void*)0),
2176	fr->bMolPBC, state->box,
2177	state->lambda[efptBONDED], &dvdl_dum,
2178	state->v, NULL((void*)0), nrnb, econqCoord,
2179	ir->epc == epcMTTK, state->veta, state->veta);
2180
2181	for (i = start; i < end; i++)
2182	{
2183	for (m = 0; m < DIM3; m++)
2184	{
2185	/* Re-reverse the velocities */
2186	state->v[i][m] = -state->v[i][m];
2187	}
2188	}
2189	}
2190	sfree(savex)save_free("savex", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2190, (savex));
2191	}
2192
2193
2194	static void
2195	integrate_table(real vdwtab[], real scale, int offstart, int rstart, int rend,
2196	double enerout, double virout)
2197	{
2198	double enersum, virsum;
2199	double invscale, invscale2, invscale3;
2200	double r, ea, eb, ec, pa, pb, pc, pd;
2201	double y0, f, g, h;
2202	int ri, offset, tabfactor;
2203
2204	invscale = 1.0/scale;
2205	invscale2 = invscale*invscale;
2206	invscale3 = invscale*invscale2;
2207
2208	/* Following summation derived from cubic spline definition,
2209	* Numerical Recipies in C, second edition, p. 113-116. Exact for
2210	* the cubic spline. We first calculate the negative of the
2211	* energy from rvdw to rvdw_switch, assuming that g(r)=1, and then
2212	* add the more standard, abrupt cutoff correction to that result,
2213	* yielding the long-range correction for a switched function. We
2214	* perform both the pressure and energy loops at the same time for
2215	* simplicity, as the computational cost is low. */
2216
2217	if (offstart == 0)
2218	{
2219	/* Since the dispersion table has been scaled down a factor
2220	* 6.0 and the repulsion a factor 12.0 to compensate for the
2221	* c6/c12 parameters inside nbfp[] being scaled up (to save
2222	* flops in kernels), we need to correct for this.
2223	*/
2224	tabfactor = 6.0;
2225	}
2226	else
2227	{
2228	tabfactor = 12.0;
2229	}
2230
2231	enersum = 0.0;
2232	virsum = 0.0;
2233	for (ri = rstart; ri < rend; ++ri)
2234	{
2235	r = ri*invscale;
2236	ea = invscale3;
2237	eb = 2.0invscale2r;
2238	ec = invscalerr;
2239
2240	pa = invscale3;
2241	pb = 3.0invscale2r;
2242	pc = 3.0invscaler*r;
2243	pd = rrr;
2244
2245	/* this "8" is from the packing in the vdwtab array - perhaps
2246	should be defined? */
2247
2248	offset = 8*ri + offstart;
2249	y0 = vdwtab[offset];
2250	f = vdwtab[offset+1];
2251	g = vdwtab[offset+2];
2252	h = vdwtab[offset+3];
2253
2254	enersum += y0(ea/3 + eb/2 + ec) + f(ea/4 + eb/3 + ec/2) + g(ea/5 + eb/4 + ec/3) + h(ea/6 + eb/5 + ec/4);
2255	virsum += f(pa/4 + pb/3 + pc/2 + pd) + 2g(pa/5 + pb/4 + pc/3 + pd/2) + 3h*(pa/6 + pb/5 + pc/4 + pd/3);
2256	}
2257	enerout = 4.0M_PI3.14159265358979323846enersumtabfactor;
2258	virout = 4.0M_PI3.14159265358979323846virsumtabfactor;
2259	}
2260
2261	void calc_enervirdiff(FILE fplog, int eDispCorr, t_forcerec fr)
2262	{
2263	double eners[2], virs[2], enersum, virsum, y0, f, g, h;
2264	double r0, r1, r, rc3, rc9, ea, eb, ec, pa, pb, pc, pd;
2265	double invscale, invscale2, invscale3;
2266	int ri0, ri1, ri, i, offstart, offset;
2267	real scale, *vdwtab, tabfactor, tmp;
2268
2269	fr->enershiftsix = 0;
2270	fr->enershifttwelve = 0;
2271	fr->enerdiffsix = 0;
2272	fr->enerdifftwelve = 0;
2273	fr->virdiffsix = 0;
2274	fr->virdifftwelve = 0;
2275
2276	if (eDispCorr != edispcNO)
2277	{
2278	for (i = 0; i < 2; i++)
2279	{
2280	eners[i] = 0;
2281	virs[i] = 0;
2282	}
2283	if (fr->vdwtype == evdwSWITCH \|\| fr->vdwtype == evdwSHIFT \|\|
2284	fr->vdw_modifier == eintmodPOTSWITCH \|\|
2285	fr->vdw_modifier == eintmodFORCESWITCH)
2286	{
2287	if (fr->rvdw_switch == 0)
2288	{
2289	gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2289,
2290	"With dispersion correction rvdw-switch can not be zero "
2291	"for vdw-type = %s", evdw_names[fr->vdwtype]);
2292	}
2293
2294	scale = fr->nblists[0].table_elec_vdw.scale;
2295	vdwtab = fr->nblists[0].table_vdw.data;
2296
2297	/* Round the cut-offs to exact table values for precision */
2298	ri0 = floor(fr->rvdw_switch*scale);
2299	ri1 = ceil(fr->rvdw*scale);
2300	r0 = ri0/scale;
2301	r1 = ri1/scale;
2302	rc3 = r0r0r0;
2303	rc9 = rc3rc3rc3;
2304
2305	if (fr->vdwtype == evdwSHIFT \|\|
2306	fr->vdw_modifier == eintmodFORCESWITCH)
2307	{
2308	/* Determine the constant energy shift below rvdw_switch.
2309	* Table has a scale factor since we have scaled it down to compensate
2310	* for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
2311	*/
2312	fr->enershiftsix = (real)(-1.0/(rc3rc3)) - 6.0vdwtab[8*ri0];
2313	fr->enershifttwelve = (real)( 1.0/(rc9rc3)) - 12.0vdwtab[8*ri0 + 4];
2314	}
2315	/* Add the constant part from 0 to rvdw_switch.
2316	* This integration from 0 to rvdw_switch overcounts the number
2317	* of interactions by 1, as it also counts the self interaction.
2318	* We will correct for this later.
2319	*/
2320	eners[0] += 4.0M_PI3.14159265358979323846fr->enershiftsix*rc3/3.0;
2321	eners[1] += 4.0M_PI3.14159265358979323846fr->enershifttwelve*rc3/3.0;
2322	for (i = 0; i < 2; i++)
2323	{
2324	enersum = 0;
2325	virsum = 0;
2326	integrate_table(vdwtab, scale, (i == 0 ? 0 : 4), ri0, ri1, &enersum, &virsum);
2327	eners[i] -= enersum;
2328	virs[i] -= virsum;
2329	}
2330
2331	/* now add the correction for rvdw_switch to infinity */
2332	eners[0] += -4.0M_PI3.14159265358979323846/(3.0rc3);
2333	eners[1] += 4.0M_PI3.14159265358979323846/(9.0rc9);
2334	virs[0] += 8.0*M_PI3.14159265358979323846/rc3;
2335	virs[1] += -16.0M_PI3.14159265358979323846/(3.0rc9);
2336	}
2337	else if (fr->vdwtype == evdwCUT \|\|
2338	EVDW_PME(fr->vdwtype)((fr->vdwtype) == evdwPME) \|\|
2339	fr->vdwtype == evdwUSER)
2340	{
2341	if (fr->vdwtype == evdwUSER && fplog)
2342	{
2343	fprintf(fplog,
2344	"WARNING: using dispersion correction with user tables\n");
2345	}
2346
2347	/* Note that with LJ-PME, the dispersion correction is multiplied
2348	* by the difference between the actual C6 and the value of C6
2349	* that would produce the combination rule.
2350	* This means the normal energy and virial difference formulas
2351	* can be used here.
2352	*/
2353
2354	rc3 = fr->rvdwfr->rvdwfr->rvdw;
2355	rc9 = rc3rc3rc3;
2356	/* Contribution beyond the cut-off */
2357	eners[0] += -4.0M_PI3.14159265358979323846/(3.0rc3);
2358	eners[1] += 4.0M_PI3.14159265358979323846/(9.0rc9);
2359	if (fr->vdw_modifier == eintmodPOTSHIFT)
2360	{
2361	/* Contribution within the cut-off */
2362	eners[0] += -4.0M_PI3.14159265358979323846/(3.0rc3);
2363	eners[1] += 4.0M_PI3.14159265358979323846/(3.0rc9);
2364	}
2365	/* Contribution beyond the cut-off */
2366	virs[0] += 8.0*M_PI3.14159265358979323846/rc3;
2367	virs[1] += -16.0M_PI3.14159265358979323846/(3.0rc9);
2368	}
2369	else
2370	{
2371	gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2371,
2372	"Dispersion correction is not implemented for vdw-type = %s",
2373	evdw_names[fr->vdwtype]);
2374	}
2375
2376	/* TODO: remove this code once we have group LJ-PME kernels
2377	* that calculate the exact, full LJ param C6/r^6 within the cut-off,
2378	* as the current nbnxn kernels do.
2379	*/
2380	if (fr->vdwtype == evdwPME && fr->cutoff_scheme == ecutsGROUP)
2381	{
2382	/* Calculate self-interaction coefficient (assuming that
2383	* the reciprocal-space contribution is constant in the
2384	* region that contributes to the self-interaction).
2385	*/
2386	fr->enershiftsix = pow(fr->ewaldcoeff_lj, 6) / 6.0;
2387
2388	eners[0] += -pow(sqrt(M_PI3.14159265358979323846)*fr->ewaldcoeff_lj, 3)/3.0;
2389	virs[0] += pow(sqrt(M_PI3.14159265358979323846)*fr->ewaldcoeff_lj, 3);
2390	}
2391
2392	fr->enerdiffsix = eners[0];
2393	fr->enerdifftwelve = eners[1];
2394	/* The 0.5 is due to the Gromacs definition of the virial */
2395	fr->virdiffsix = 0.5*virs[0];
2396	fr->virdifftwelve = 0.5*virs[1];
2397	}
2398	}
2399
2400	void calc_dispcorr(FILE fplog, t_inputrec ir, t_forcerec *fr,
2401	gmx_int64_t step, int natoms,
2402	matrix box, real lambda, tensor pres, tensor virial,
2403	real prescorr, real enercorr, real *dvdlcorr)
2404	{
2405	gmx_bool bCorrAll, bCorrPres;
2406	real dvdlambda, invvol, dens, ninter, avcsix, avctwelve, enerdiff, svir = 0, spres = 0;
2407	int m;
2408
2409	*prescorr = 0;
2410	*enercorr = 0;
2411	*dvdlcorr = 0;
2412
2413	clear_mat(virial);
2414	clear_mat(pres);
2415
2416	if (ir->eDispCorr != edispcNO)
2417	{
2418	bCorrAll = (ir->eDispCorr == edispcAllEner \|\|
2419	ir->eDispCorr == edispcAllEnerPres);
2420	bCorrPres = (ir->eDispCorr == edispcEnerPres \|\|
2421	ir->eDispCorr == edispcAllEnerPres);
2422
2423	invvol = 1/det(box);
2424	if (fr->n_tpi)
2425	{
2426	/* Only correct for the interactions with the inserted molecule */
2427	dens = (natoms - fr->n_tpi)*invvol;
2428	ninter = fr->n_tpi;
2429	}
2430	else
2431	{
2432	dens = natoms*invvol;
2433	ninter = 0.5*natoms;
2434	}
2435
2436	if (ir->efep == efepNO)
2437	{
2438	avcsix = fr->avcsix[0];
2439	avctwelve = fr->avctwelve[0];
2440	}
2441	else
2442	{
2443	avcsix = (1 - lambda)fr->avcsix[0] + lambdafr->avcsix[1];
2444	avctwelve = (1 - lambda)fr->avctwelve[0] + lambdafr->avctwelve[1];
2445	}
2446
2447	enerdiff = ninter(densfr->enerdiffsix - fr->enershiftsix);
2448	enercorr += avcsixenerdiff;
2449	dvdlambda = 0.0;
2450	if (ir->efep != efepNO)
2451	{
2452	dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
2453	}
2454	if (bCorrAll)
2455	{
2456	enerdiff = ninter(densfr->enerdifftwelve - fr->enershifttwelve);
2457	enercorr += avctwelveenerdiff;
2458	if (fr->efep != efepNO)
2459	{
2460	dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
2461	}
2462	}
2463
2464	if (bCorrPres)
2465	{
2466	svir = ninterdensavcsix*fr->virdiffsix/3.0;
2467	if (ir->eDispCorr == edispcAllEnerPres)
2468	{
2469	svir += ninterdensavctwelve*fr->virdifftwelve/3.0;
2470	}
2471	/* The factor 2 is because of the Gromacs virial definition */
2472	spres = -2.0invvolsvir*PRESFAC(16.6054);
2473
2474	for (m = 0; m < DIM3; m++)
2475	{
2476	virial[m][m] += svir;
2477	pres[m][m] += spres;
2478	}
2479	*prescorr += spres;
2480	}
2481
2482	/* Can't currently control when it prints, for now, just print when degugging */
2483	if (debug)
2484	{
2485	if (bCorrAll)
2486	{
2487	fprintf(debug, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
2488	avcsix, avctwelve);
2489	}
2490	if (bCorrPres)
2491	{
2492	fprintf(debug,
2493	"Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
2494	*enercorr, spres, svir);
2495	}
2496	else
2497	{
2498	fprintf(debug, "Long Range LJ corr.: Epot %10g\n", *enercorr);
2499	}
2500	}
2501
2502	if (fr->bSepDVDL && do_per_step(step, ir->nstlog))
2503	{
2504	gmx_print_sepdvdl(fplog, "Dispersion correction", *enercorr, dvdlambda);
2505	}
2506	if (fr->efep != efepNO)
2507	{
2508	*dvdlcorr += dvdlambda;
2509	}
2510	}
2511	}
2512
2513	void do_pbc_first(FILE fplog, matrix box, t_forcerec fr,
2514	t_graph *graph, rvec x[])
2515	{
2516	if (fplog)
2517	{
2518	fprintf(fplog, "Removing pbc first time\n");
2519	}
2520	calc_shifts(box, fr->shift_vec);
2521	if (graph)
2522	{
2523	mk_mshift(fplog, graph, fr->ePBC, box, x);
2524	if (gmx_debug_at)
2525	{
2526	p_graph(debug, "do_pbc_first 1", graph);
2527	}
2528	shift_self(graph, box, x);
2529	/* By doing an extra mk_mshift the molecules that are broken
2530	* because they were e.g. imported from another software
2531	* will be made whole again. Such are the healing powers
2532	* of GROMACS.
2533	*/
2534	mk_mshift(fplog, graph, fr->ePBC, box, x);
2535	if (gmx_debug_at)
2536	{
2537	p_graph(debug, "do_pbc_first 2", graph);
2538	}
2539	}
2540	if (fplog)
2541	{
2542	fprintf(fplog, "Done rmpbc\n");
2543	}
2544	}
2545
2546	static void low_do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
2547	gmx_mtop_t *mtop, rvec x[],
2548	gmx_bool bFirst)
2549	{
2550	t_graph *graph;
2551	int mb, as, mol;
2552	gmx_molblock_t *molb;
2553
2554	if (bFirst && fplog)
2555	{
2556	fprintf(fplog, "Removing pbc first time\n");
2557	}
2558
2559	snew(graph, 1)(graph) = save_calloc("graph", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2559, (1), sizeof(*(graph)));
2560	as = 0;
2561	for (mb = 0; mb < mtop->nmolblock; mb++)
2562	{
2563	molb = &mtop->molblock[mb];
2564	if (molb->natoms_mol == 1 \|\|
2565	(!bFirst && mtop->moltype[molb->type].cgs.nr == 1))
2566	{
2567	/* Just one atom or charge group in the molecule, no PBC required */
2568	as += molb->nmol*molb->natoms_mol;
2569	}
2570	else
2571	{
2572	/* Pass NULL iso fplog to avoid graph prints for each molecule type */
2573	mk_graph_ilist(NULL((void*)0), mtop->moltype[molb->type].ilist,
2574	0, molb->natoms_mol, FALSE0, FALSE0, graph);
2575
2576	for (mol = 0; mol < molb->nmol; mol++)
2577	{
2578	mk_mshift(fplog, graph, ePBC, box, x+as);
2579
2580	shift_self(graph, box, x+as);
2581	/* The molecule is whole now.
2582	* We don't need the second mk_mshift call as in do_pbc_first,
2583	* since we no longer need this graph.
2584	*/
2585
2586	as += molb->natoms_mol;
2587	}
2588	done_graph(graph);
2589	}
2590	}
2591	sfree(graph)save_free("graph", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2591, (graph));
2592	}
2593
2594	void do_pbc_first_mtop(FILE *fplog, int ePBC, matrix box,
2595	gmx_mtop_t *mtop, rvec x[])
2596	{
2597	low_do_pbc_mtop(fplog, ePBC, box, mtop, x, TRUE1);
2598	}
2599
2600	void do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
2601	gmx_mtop_t *mtop, rvec x[])
2602	{
2603	low_do_pbc_mtop(fplog, ePBC, box, mtop, x, FALSE0);
2604	}
2605
2606	void finish_run(FILE fplog, t_commrec cr,
2607	t_inputrec *inputrec,
2608	t_nrnb nrnb[], gmx_wallcycle_t wcycle,
2609	gmx_walltime_accounting_t walltime_accounting,
2610	wallclock_gpu_t *gputimes,
2611	gmx_bool bWriteStat)
2612	{
2613	int i, j;
2614	t_nrnb nrnb_tot = NULL((void)0);
2615	real delta_t;
2616	double nbfs, mflop;
2617	double elapsed_time,
2618	elapsed_time_over_all_ranks,
2619	elapsed_time_over_all_threads,
2620	elapsed_time_over_all_threads_over_all_ranks;
2621	wallcycle_sum(cr, wcycle);
2622
2623	if (cr->nnodes > 1)
2624	{
2625	snew(nrnb_tot, 1)(nrnb_tot) = save_calloc("nrnb_tot", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2625, (1), sizeof(*(nrnb_tot)));
2626	#ifdef GMX_MPI
2627	MPI_AllreducetMPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLETMPI_DOUBLE, MPI_SUMTMPI_SUM,
2628	cr->mpi_comm_mysim);
2629	#endif
2630	}
2631	else
2632	{
2633	nrnb_tot = nrnb;
2634	}
2635
2636	elapsed_time = walltime_accounting_get_elapsed_time(walltime_accounting);
2637	elapsed_time_over_all_ranks = elapsed_time;
2638	elapsed_time_over_all_threads = walltime_accounting_get_elapsed_time_over_all_threads(walltime_accounting);
2639	elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
2640	#ifdef GMX_MPI
2641	if (cr->nnodes > 1)
2642	{
2643	/* reduce elapsed_time over all MPI ranks in the current simulation */
2644	MPI_AllreducetMPI_Allreduce(&elapsed_time,
2645	&elapsed_time_over_all_ranks,
2646	1, MPI_DOUBLETMPI_DOUBLE, MPI_SUMTMPI_SUM,
2647	cr->mpi_comm_mysim);
2648	elapsed_time_over_all_ranks /= cr->nnodes;
2649	/* Reduce elapsed_time_over_all_threads over all MPI ranks in the
2650	* current simulation. */
2651	MPI_AllreducetMPI_Allreduce(&elapsed_time_over_all_threads,
2652	&elapsed_time_over_all_threads_over_all_ranks,
2653	1, MPI_DOUBLETMPI_DOUBLE, MPI_SUMTMPI_SUM,
2654	cr->mpi_comm_mysim);
2655	}
2656	#endif
2657
2658	if (SIMMASTER(cr)(((((cr)->nodeid == 0) \|\| !((cr)->nnodes > 1)) && ((cr)->duty & (1<<0))) \|\| !((cr)->nnodes > 1)))
2659	{
2660	print_flop(fplog, nrnb_tot, &nbfs, &mflop);
2661	}
2662	if (cr->nnodes > 1)
2663	{
2664	sfree(nrnb_tot)save_free("nrnb_tot", "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/sim_util.c" , 2664, (nrnb_tot));
2665	}
2666
2667	if ((cr->duty & DUTY_PP(1<<0)) && DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes > 1)))
2668	{
2669	print_dd_statistics(cr, inputrec, fplog);
2670	}
2671
2672	if (SIMMASTER(cr)(((((cr)->nodeid == 0) \|\| !((cr)->nnodes > 1)) && ((cr)->duty & (1<<0))) \|\| !((cr)->nnodes > 1)))
2673	{
2674	wallcycle_print(fplog, cr->nnodes, cr->npmenodes,
2675	elapsed_time_over_all_ranks,
2676	wcycle, gputimes);
2677
2678	if (EI_DYNAMICS(inputrec->eI)(((inputrec->eI) == eiMD \|\| ((inputrec->eI) == eiVV \|\| ( inputrec->eI) == eiVVAK)) \|\| ((inputrec->eI) == eiSD1 \|\| (inputrec->eI) == eiSD2) \|\| (inputrec->eI) == eiBD))
2679	{
2680	delta_t = inputrec->delta_t;
2681	}
2682	else
2683	{
2684	delta_t = 0;
2685	}
2686
2687	if (fplog)
2688	{
2689	print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
2690	elapsed_time_over_all_ranks,
2691	walltime_accounting_get_nsteps_done(walltime_accounting),
2692	delta_t, nbfs, mflop);
2693	}
2694	if (bWriteStat)
2695	{
2696	print_perf(stderrstderr, elapsed_time_over_all_threads_over_all_ranks,
2697	elapsed_time_over_all_ranks,
2698	walltime_accounting_get_nsteps_done(walltime_accounting),
2699	delta_t, nbfs, mflop);
2700	}
2701	}
2702	}
2703
2704	extern void initialize_lambdas(FILE fplog, t_inputrec ir, int fep_state, real lambda, double *lam0)
2705	{
2706	/* this function works, but could probably use a logic rewrite to keep all the different
2707	types of efep straight. */
2708
2709	int i;
2710	t_lambda *fep = ir->fepvals;
2711
2712	if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE0))
2713	{
2714	for (i = 0; i < efptNR; i++)
2715	{
2716	lambda[i] = 0.0;
2717	if (lam0)
2718	{
2719	lam0[i] = 0.0;
2720	}
2721	}
2722	return;
2723	}
2724	else
2725	{
2726	fep_state = fep->init_fep_state; / this might overwrite the checkpoint
2727	if checkpoint is set -- a kludge is in for now
2728	to prevent this.*/
2729	for (i = 0; i < efptNR; i++)
2730	{
2731	/* overwrite lambda state with init_lambda for now for backwards compatibility */
2732	if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
2733	{
2734	lambda[i] = fep->init_lambda;
2735	if (lam0)
2736	{
2737	lam0[i] = lambda[i];
2738	}
2739	}
2740	else
2741	{
2742	lambda[i] = fep->all_lambda[i][*fep_state];
2743	if (lam0)
2744	{
2745	lam0[i] = lambda[i];
2746	}
2747	}
2748	}
2749	if (ir->bSimTemp)
2750	{
2751	/* need to rescale control temperatures to match current state */
2752	for (i = 0; i < ir->opts.ngtc; i++)
2753	{
2754	if (ir->opts.ref_t[i] > 0)
2755	{
2756	ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
2757	}
2758	}
2759	}
2760	}
2761
2762	/* Send to the log the information on the current lambdas */
2763	if (fplog != NULL((void*)0))
2764	{
2765	fprintf(fplog, "Initial vector of lambda components:[ ");
2766	for (i = 0; i < efptNR; i++)
2767	{
2768	fprintf(fplog, "%10.4f ", lambda[i]);
2769	}
2770	fprintf(fplog, "]\n");
2771	}
2772	return;
2773	}
2774
2775
2776	void init_md(FILE *fplog,
2777	t_commrec cr, t_inputrec ir, const output_env_t oenv,
2778	double t, double t0,
2779	real lambda, int fep_state, double *lam0,
2780	t_nrnb nrnb, gmx_mtop_t mtop,
2781	gmx_update_t *upd,
2782	int nfile, const t_filenm fnm[],
2783	gmx_mdoutf_t outf, t_mdebin *mdebin,
2784	tensor force_vir, tensor shake_vir, rvec mu_tot,
2785	gmx_bool bSimAnn, t_vcm *vcm, unsigned long Flags)
2786	{
2787	int i, j, n;
2788	real tmpt, mod;
2789
2790	/* Initial values */
2791	t = t0 = ir->init_t;
2792
2793	*bSimAnn = FALSE0;
2794	for (i = 0; i < ir->opts.ngtc; i++)
2795	{
2796	/* set bSimAnn if any group is being annealed */
2797	if (ir->opts.annealing[i] != eannNO)
2798	{
2799	*bSimAnn = TRUE1;
2800	}
2801	}
2802	if (*bSimAnn)
2803	{
2804	update_annealing_target_temp(&(ir->opts), ir->init_t);
2805	}
2806
2807	/* Initialize lambda variables */
2808	initialize_lambdas(fplog, ir, fep_state, lambda, lam0);
2809
2810	if (upd)
2811	{
2812	*upd = init_update(ir);
2813	}
2814
2815
2816	if (vcm != NULL((void*)0))
2817	{
2818	*vcm = init_vcm(fplog, &mtop->groups, ir);
2819	}
2820
2821	if (EI_DYNAMICS(ir->eI)(((ir->eI) == eiMD \|\| ((ir->eI) == eiVV \|\| (ir->eI) == eiVVAK)) \|\| ((ir->eI) == eiSD1 \|\| (ir->eI) == eiSD2) \|\| (ir->eI) == eiBD) && !(Flags & MD_APPENDFILES(1<<15)))
2822	{
2823	if (ir->etc == etcBERENDSEN)
2824	{
2825	please_cite(fplog, "Berendsen84a");
2826	}
2827	if (ir->etc == etcVRESCALE)
2828	{
2829	please_cite(fplog, "Bussi2007a");
2830	}
2831	}
2832
2833	init_nrnb(nrnb);
2834
2835	if (nfile != -1)
2836	{
2837	*outf = init_mdoutf(fplog, nfile, fnm, Flags, cr, ir, mtop, oenv);
2838
2839	mdebin = init_mdebin((Flags & MD_APPENDFILES(1<<15)) ? NULL((void)0) : mdoutf_get_fp_ene(*outf),
2840	mtop, ir, mdoutf_get_fp_dhdl(*outf));
2841	}
2842
2843	if (ir->bAdress)
2844	{
2845	please_cite(fplog, "Fritsch12");
2846	please_cite(fplog, "Junghans10");
2847	}
2848	/* Initiate variables */
2849	clear_mat(force_vir);
2850	clear_mat(shake_vir);
2851	clear_rvec(mu_tot);
2852
2853	debug_gmx();
2854	}