/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c

Bug Summary

File:	programs/mdrun/runner.c
Location:	line 1729, column 53
Description:	Access to field 'cginfo_mb' results in a dereference of a null pointer (loaded from variable 'fr')

Annotated Source Code

* This file is part of the GROMACS molecular simulation package.

* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,

* and including many others, as listed in the AUTHORS file in the

* top-level source directory and at http://www.gromacs.org.

* GROMACS is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public License

* as published by the Free Software Foundation; either version 2.1

* of the License, or (at your option) any later version.

* GROMACS is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

* You should have received a copy of the GNU Lesser General Public

* License along with GROMACS; if not, see

* http://www.gnu.org/licenses, or write to the Free Software Foundation,

* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

* If you want to redistribute modifications to GROMACS, please

* consider that scientific software is very special. Version

* control is crucial - bugs must be traceable. We will be happy to

* consider code for inclusion in the official distribution, but

* derived work must not be called official GROMACS. Details are found

* in the README & COPYING files - if they are missing, get the

* official version at http://www.gromacs.org.

* To help us fund GROMACS development, we humbly ask that you cite

* the research papers on the package. Check out http://www.gromacs.org.

#ifdef HAVE_CONFIG_H1

#include <config.h>

#endif

#include <assert.h>

#include <signal.h>

#include <stdlib.h>

#include <string.h>

#ifdef HAVE_UNISTD_H

#include <unistd.h>

#endif

#include "typedefs.h"

#include "copyrite.h"

#include "force.h"

#include "mdrun.h"

#include "md_logging.h"

#include "md_support.h"

#include "network.h"

#include "names.h"

#include "disre.h"

#include "orires.h"

#include "pme.h"

#include "mdatoms.h"

#include "repl_ex.h"

#include "deform.h"

#include "qmmm.h"

#include "domdec.h"

#include "coulomb.h"

#include "constr.h"

#include "mvdata.h"

#include "checkpoint.h"

#include "mtop_util.h"

#include "sighandler.h"

#include "txtdump.h"

#include "gmx_detect_hardware.h"

#include "gmx_omp_nthreads.h"

#include "gromacs/gmxpreprocess/calc_verletbuf.h"

#include "membed.h"

#include "macros.h"

#include "gmx_thread_affinity.h"

#include "inputrec.h"

#include "main.h"

#include "gromacs/essentialdynamics/edsam.h"

#include "gromacs/fileio/tpxio.h"

#include "gromacs/math/vec.h"

#include "gromacs/mdlib/nbnxn_search.h"

#include "gromacs/mdlib/nbnxn_consts.h"

#include "gromacs/pulling/pull.h"

#include "gromacs/pulling/pull_rotation.h"

#include "gromacs/swap/swapcoords.h"

#include "gromacs/timing/wallcycle.h"

#include "gromacs/utility/gmxmpi.h"

#include "gromacs/utility/smalloc.h"

#ifdef GMX_FAHCORE

#include "corewrap.h"

#endif

#include "gpu_utils.h"

#include "nbnxn_cuda_data_mgmt.h"

100

typedef struct {

101

gmx_integrator_t *func;

102

} gmx_intp_t;

103

104

/* The array should match the eI array in include/types/enums.h */

105

const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md}, {do_md}};

106

107

gmx_int64_t deform_init_init_step_tpx;

108

matrix deform_init_box_tpx;

109

tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER{ {0}, ((void*)0) };

110

111

112

#ifdef GMX_THREAD_MPI

113

/* The minimum number of atoms per tMPI thread. With fewer atoms than this,

114

* the number of threads will get lowered.

115

116

#define MIN_ATOMS_PER_MPI_THREAD90 90

117

#define MIN_ATOMS_PER_GPU900 900

118

119

struct mdrunner_arglist

120

{

121

gmx_hw_opt_t hw_opt;

122

FILE *fplog;

123

t_commrec *cr;

124

int nfile;

125

const t_filenm *fnm;

126

output_env_t oenv;

127

gmx_bool bVerbose;

128

gmx_bool bCompact;

129

int nstglobalcomm;

130

ivec ddxyz;

131

int dd_node_order;

132

real rdd;

133

real rconstr;

134

const char *dddlb_opt;

135

real dlb_scale;

136

const char *ddcsx;

137

const char *ddcsy;

138

const char *ddcsz;

139

const char *nbpu_opt;

140

int nstlist_cmdline;

141

gmx_int64_t nsteps_cmdline;

142

int nstepout;

143

int resetstep;

144

int nmultisim;

145

int repl_ex_nst;

146

int repl_ex_nex;

147

int repl_ex_seed;

148

real pforce;

149

real cpt_period;

150

real max_hours;

151

const char *deviceOptions;

152

int imdport;

153

unsigned long Flags;

154

};

155

156

157

/* The function used for spawning threads. Extracts the mdrunner()

158

arguments from its one argument and calls mdrunner(), after making

159

a commrec. */

160

static void mdrunner_start_fn(void *arg)

161

{

162

struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg;

163

struct mdrunner_arglist mc = *mda; /* copy the arg list to make sure

164

that it's thread-local. This doesn't

165

copy pointed-to items, of course,

166

but those are all const. */

167

t_commrec *cr; /* we need a local version of this */

168

FILE *fplog = NULL((void*)0);

169

t_filenm *fnm;

170

171

fnm = dup_tfn(mc.nfile, mc.fnm);

172

173

cr = reinitialize_commrec_for_this_thread(mc.cr);

174

175

if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))

176

{

177

fplog = mc.fplog;

178

}

179

180

mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,

181

mc.bVerbose, mc.bCompact, mc.nstglobalcomm,

182

mc.ddxyz, mc.dd_node_order, mc.rdd,

183

mc.rconstr, mc.dddlb_opt, mc.dlb_scale,

184

mc.ddcsx, mc.ddcsy, mc.ddcsz,

185

mc.nbpu_opt, mc.nstlist_cmdline,

186

mc.nsteps_cmdline, mc.nstepout, mc.resetstep,

187

mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,

188

mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.imdport, mc.Flags);

189

}

190

191

/* called by mdrunner() to start a specific number of threads (including

192

the main thread) for thread-parallel runs. This in turn calls mdrunner()

193

for each thread.

194

All options besides nthreads are the same as for mdrunner(). */

195

static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,

196

FILE *fplog, t_commrec *cr, int nfile,

197

const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,

198

gmx_bool bCompact, int nstglobalcomm,

199

ivec ddxyz, int dd_node_order, real rdd, real rconstr,

200

const char *dddlb_opt, real dlb_scale,

201

const char *ddcsx, const char *ddcsy, const char *ddcsz,

202

const char *nbpu_opt, int nstlist_cmdline,

203

gmx_int64_t nsteps_cmdline,

204

int nstepout, int resetstep,

205

int nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,

206

real pforce, real cpt_period, real max_hours,

207

const char *deviceOptions, unsigned long Flags)

208

{

209

int ret;

210

struct mdrunner_arglist *mda;

211

t_commrec *crn; /* the new commrec */

212

t_filenm *fnmn;

213

214

/* first check whether we even need to start tMPI */

215

if (hw_opt->nthreads_tmpi < 2)

216

{

217

return cr;

218

}

219

220

/* a few small, one-time, almost unavoidable memory leaks: */

221

snew(mda, 1)(mda) = save_calloc("mda", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 221, (1), sizeof(*(mda)));

222

fnmn = dup_tfn(nfile, fnm);

223

224

/* fill the data structure to pass as void pointer to thread start fn */

225

/* hw_opt contains pointers, which should all be NULL at this stage */

226

mda->hw_opt = *hw_opt;

227

mda->fplog = fplog;

228

mda->cr = cr;

229

mda->nfile = nfile;

230

mda->fnm = fnmn;

231

mda->oenv = oenv;

232

mda->bVerbose = bVerbose;

233

mda->bCompact = bCompact;

234

mda->nstglobalcomm = nstglobalcomm;

235

mda->ddxyz[XX0] = ddxyz[XX0];

236

mda->ddxyz[YY1] = ddxyz[YY1];

237

mda->ddxyz[ZZ2] = ddxyz[ZZ2];

238

mda->dd_node_order = dd_node_order;

239

mda->rdd = rdd;

240

mda->rconstr = rconstr;

241

mda->dddlb_opt = dddlb_opt;

242

mda->dlb_scale = dlb_scale;

243

mda->ddcsx = ddcsx;

244

mda->ddcsy = ddcsy;

245

mda->ddcsz = ddcsz;

246

mda->nbpu_opt = nbpu_opt;

247

mda->nstlist_cmdline = nstlist_cmdline;

248

mda->nsteps_cmdline = nsteps_cmdline;

249

mda->nstepout = nstepout;

250

mda->resetstep = resetstep;

251

mda->nmultisim = nmultisim;

252

mda->repl_ex_nst = repl_ex_nst;

253

mda->repl_ex_nex = repl_ex_nex;

254

mda->repl_ex_seed = repl_ex_seed;

255

mda->pforce = pforce;

256

mda->cpt_period = cpt_period;

257

mda->max_hours = max_hours;

258

mda->deviceOptions = deviceOptions;

259

mda->Flags = Flags;

260

261

/* now spawn new threads that start mdrunner_start_fn(), while

262

the main thread returns, we set thread affinity later */

263

ret = tMPI_Init_fn(TRUE1, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE,

264

mdrunner_start_fn, (void*)(mda) );

265

if (ret != TMPI_SUCCESS)

266

{

267

return NULL((void*)0);

268

}

269

270

crn = reinitialize_commrec_for_this_thread(cr);

271

return crn;

272

}

273

274

275

static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,

276

const gmx_hw_opt_t *hw_opt,

277

int nthreads_tot,

278

int ngpu)

279

{

280

int nthreads_tmpi;

281

282

/* There are no separate PME nodes here, as we ensured in

283

* check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes

284

* and a conditional ensures we would not have ended up here.

285

* Note that separate PME nodes might be switched on later.

286

287

if (ngpu > 0)

288

{

289

nthreads_tmpi = ngpu;

290

if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)

291

{

292

nthreads_tmpi = nthreads_tot;

293

}

294

}

295

else if (hw_opt->nthreads_omp > 0)

296

{

297

/* Here we could oversubscribe, when we do, we issue a warning later */

298

nthreads_tmpi = max(1, nthreads_tot/hw_opt->nthreads_omp)(((1) > (nthreads_tot/hw_opt->nthreads_omp)) ? (1) : (nthreads_tot
/hw_opt->nthreads_omp) );

299

}

300

else

301

{

302

/* TODO choose nthreads_omp based on hardware topology

303

when we have a hardware topology detection library */

304

/* In general, when running up to 4 threads, OpenMP should be faster.

305

* Note: on AMD Bulldozer we should avoid running OpenMP over two dies.

306

* On Intel>=Nehalem running OpenMP on a single CPU is always faster,

307

* even on two CPUs it's usually faster (but with many OpenMP threads

308

* it could be faster not to use HT, currently we always use HT).

309

* On Nehalem/Westmere we want to avoid running 16 threads over

310

* two CPUs with HT, so we need a limit<16; thus we use 12.

311

* A reasonable limit for Intel Sandy and Ivy bridge,

312

* not knowing the topology, is 16 threads.

313

314

const int nthreads_omp_always_faster = 4;

315

const int nthreads_omp_always_faster_Nehalem = 12;

316

const int nthreads_omp_always_faster_SandyBridge = 16;

317

const int first_model_Nehalem = 0x1A;

318

const int first_model_SandyBridge = 0x2A;

319

gmx_bool bIntel_Family6;

320

321

bIntel_Family6 =

322

(gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&

323

gmx_cpuid_family(hwinfo->cpuid_info) == 6);

324

325

if (nthreads_tot <= nthreads_omp_always_faster ||

326

(bIntel_Family6 &&

327

((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||

328

(gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))

329

{

330

/* Use pure OpenMP parallelization */

331

nthreads_tmpi = 1;

332

}

333

else

334

{

335

/* Don't use OpenMP parallelization */

336

nthreads_tmpi = nthreads_tot;

337

}

338

}

339

340

return nthreads_tmpi;

341

}

342

343

344

/* Get the number of threads to use for thread-MPI based on how many

345

* were requested, which algorithms we're using,

346

* and how many particles there are.

347

* At the point we have already called check_and_update_hw_opt.

348

* Thus all options should be internally consistent and consistent

349

* with the hardware, except that ntmpi could be larger than #GPU.

350

351

static int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,

352

gmx_hw_opt_t *hw_opt,

353

t_inputrec *inputrec, gmx_mtop_t *mtop,

354

const t_commrec *cr,

355

FILE *fplog)

356

{

357

int nthreads_hw, nthreads_tot_max, nthreads_tmpi, nthreads_new, ngpu;

358

int min_atoms_per_mpi_thread;

359

char *env;

360

char sbuf[STRLEN4096];

361

gmx_bool bCanUseGPU;

362

363

if (hw_opt->nthreads_tmpi > 0)

364

{

365

/* Trivial, return right away */

366

return hw_opt->nthreads_tmpi;

367

}

368

369

nthreads_hw = hwinfo->nthreads_hw_avail;

370

371

/* How many total (#tMPI*#OpenMP) threads can we start? */

372

if (hw_opt->nthreads_tot > 0)

373

{

374

nthreads_tot_max = hw_opt->nthreads_tot;

375

}

376

else

377

{

378

nthreads_tot_max = nthreads_hw;

379

}

380

381

bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET &&

382

hwinfo->gpu_info.ncuda_dev_compatible > 0);

383

if (bCanUseGPU)

384

{

385

ngpu = hwinfo->gpu_info.ncuda_dev_compatible;

386

}

387

else

388

{

389

ngpu = 0;

390

}

391

392

if (inputrec->cutoff_scheme == ecutsGROUP)

393

{

394

/* We checked this before, but it doesn't hurt to do it once more */

395

assert(hw_opt->nthreads_omp == 1)((void) (0));

396

}

397

398

nthreads_tmpi =

399

get_tmpi_omp_thread_division(hwinfo, hw_opt, nthreads_tot_max, ngpu);

400

401

if (inputrec->eI == eiNM || EI_TPI(inputrec->eI)((inputrec->eI) == eiTPI || (inputrec->eI) == eiTPIC))

402

{

403

/* Dims/steps are divided over the nodes iso splitting the atoms */

404

min_atoms_per_mpi_thread = 0;

405

}

406

else

407

{

408

if (bCanUseGPU)

409

{

410

min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU900;

411

}

412

else

413

{

414

min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD90;

415

}

416

}

417

418

/* Check if an algorithm does not support parallel simulation. */

419

if (nthreads_tmpi != 1 &&

420

( inputrec->eI == eiLBFGS ||

421

inputrec->coulombtype == eelEWALD ) )

422

{

423

nthreads_tmpi = 1;

424

425

md_print_warn(cr, fplog, "The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");

426

if (hw_opt->nthreads_tmpi > nthreads_tmpi)

427

{

428

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 428, "You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");

429

}

430

}

431

else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)

432

{

433

/* the thread number was chosen automatically, but there are too many

434

threads (too few atoms per thread) */

435

nthreads_new = max(1, mtop->natoms/min_atoms_per_mpi_thread)(((1) > (mtop->natoms/min_atoms_per_mpi_thread)) ? (1) :
(mtop->natoms/min_atoms_per_mpi_thread) );

436

437

/* Avoid partial use of Hyper-Threading */

438

if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&

439

nthreads_new > nthreads_hw/2 && nthreads_new < nthreads_hw)

440

{

441

nthreads_new = nthreads_hw/2;

442

}

443

444

/* Avoid large prime numbers in the thread count */

445

if (nthreads_new >= 6)

446

{

447

/* Use only 6,8,10 with additional factors of 2 */

448

int fac;

449

450

fac = 2;

451

while (3*fac*2 <= nthreads_new)

452

{

453

fac *= 2;

454

}

455

456

nthreads_new = (nthreads_new/fac)*fac;

457

}

458

else

459

{

460

/* Avoid 5 */

461

if (nthreads_new == 5)

462

{

463

nthreads_new = 4;

464

}

465

}

466

467

nthreads_tmpi = nthreads_new;

468

469

fprintf(stderrstderr, "\n");

470

fprintf(stderrstderr, "NOTE: Parallelization is limited by the small number of atoms,\n");

471

fprintf(stderrstderr, " only starting %d thread-MPI threads.\n", nthreads_tmpi);

472

fprintf(stderrstderr, " You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");

473

}

474

475

return nthreads_tmpi;

476

}

477

#endif /* GMX_THREAD_MPI */

478

479

480

/* We determine the extra cost of the non-bonded kernels compared to

481

* a reference nstlist value of 10 (which is the default in grompp).

482

483

static const int nbnxn_reference_nstlist = 10;

484

/* The values to try when switching */

485

const int nstlist_try[] = { 20, 25, 40 };

486

#define NNSTLsizeof(nstlist_try)/sizeof(nstlist_try[0]) sizeof(nstlist_try)/sizeof(nstlist_try[0])

487

/* Increase nstlist until the non-bonded cost increases more than listfac_ok,

488

* but never more than listfac_max.

489

* A standard (protein+)water system at 300K with PME ewald_rtol=1e-5

490

* needs 1.28 at rcoulomb=0.9 and 1.24 at rcoulomb=1.0 to get to nstlist=40.

491

* Note that both CPU and GPU factors are conservative. Performance should

492

* not go down due to this tuning, except with a relatively slow GPU.

493

* On the other hand, at medium/high parallelization or with fast GPUs

494

* nstlist will not be increased enough to reach optimal performance.

495

496

/* CPU: pair-search is about a factor 1.5 slower than the non-bonded kernel */

497

static const float nbnxn_cpu_listfac_ok = 1.05;

498

static const float nbnxn_cpu_listfac_max = 1.09;

499

/* GPU: pair-search is a factor 1.5-3 slower than the non-bonded kernel */

500

static const float nbnxn_gpu_listfac_ok = 1.20;

501

static const float nbnxn_gpu_listfac_max = 1.30;

502

503

/* Try to increase nstlist when using the Verlet cut-off scheme */

504

static void increase_nstlist(FILE *fp, t_commrec *cr,

505

t_inputrec *ir, int nstlist_cmdline,

506

const gmx_mtop_t *mtop, matrix box,

507

gmx_bool bGPU)

508

{

509

float listfac_ok, listfac_max;

510

int nstlist_orig, nstlist_prev;

511

verletbuf_list_setup_t ls;

512

real rlist_nstlist10, rlist_inc, rlist_ok, rlist_max;

513

real rlist_new, rlist_prev;

514

int nstlist_ind = 0;

515

t_state state_tmp;

516

gmx_bool bBox, bDD, bCont;

517

const char *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";

518

const char *nve_err = "Can not increase nstlist because an NVE ensemble is used";

519

const char *vbd_err = "Can not increase nstlist because verlet-buffer-tolerance is not set or used";

520

const char *box_err = "Can not increase nstlist because the box is too small";

521

const char *dd_err = "Can not increase nstlist because of domain decomposition limitations";

522

char buf[STRLEN4096];

523

524

if (nstlist_cmdline <= 0)

525

{

526

if (fp != NULL((void*)0) && bGPU && ir->nstlist < nstlist_try[0])

527

{

528

fprintf(fp, nstl_gpu, ir->nstlist);

529

}

530

nstlist_ind = 0;

531

while (nstlist_ind < NNSTLsizeof(nstlist_try)/sizeof(nstlist_try[0]) && ir->nstlist >= nstlist_try[nstlist_ind])

532

{

533

nstlist_ind++;

534

}

535

if (nstlist_ind == NNSTLsizeof(nstlist_try)/sizeof(nstlist_try[0]))

536

{

537

/* There are no larger nstlist value to try */

538

return;

539

}

540

}

541

542

if (EI_MD(ir->eI)((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) && ir->etc == etcNO)

543

{

544

if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))

545

{

546

fprintf(stderrstderr, "%s\n", nve_err);

547

}

548

if (fp != NULL((void*)0))

549

{

550

fprintf(fp, "%s\n", nve_err);

551

}

552

553

return;

554

}

555

556

if (ir->verletbuf_tol == 0 && bGPU)

557

{

558

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 558, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");

559

}

560

561

if (ir->verletbuf_tol < 0)

562

{

563

if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))

564

{

565

fprintf(stderrstderr, "%s\n", vbd_err);

566

}

567

if (fp != NULL((void*)0))

568

{

569

fprintf(fp, "%s\n", vbd_err);

570

}

571

572

return;

573

}

574

575

if (bGPU)

576

{

577

listfac_ok = nbnxn_gpu_listfac_ok;

578

listfac_max = nbnxn_gpu_listfac_max;

579

}

580

else

581

{

582

listfac_ok = nbnxn_cpu_listfac_ok;

583

listfac_max = nbnxn_cpu_listfac_max;

584

}

585

586

nstlist_orig = ir->nstlist;

587

if (nstlist_cmdline > 0)

588

{

589

if (fp)

590

{

591

sprintf(buf, "Getting nstlist=%d from command line option",

592

nstlist_cmdline);

593

}

594

ir->nstlist = nstlist_cmdline;

595

}

596

597

verletbuf_get_list_setup(bGPU, &ls);

598

599

/* Allow rlist to make the list a given factor larger than the list

600

* would be with nstlist=10.

601

602

nstlist_prev = ir->nstlist;

603

ir->nstlist = 10;

604

calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL((void*)0),

605

&rlist_nstlist10);

606

ir->nstlist = nstlist_prev;

607

608

/* Determine the pair list size increase due to zero interactions */

609

rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j,

610

mtop->natoms/det(box));

611

rlist_ok = (rlist_nstlist10 + rlist_inc)*pow(listfac_ok, 1.0/3.0) - rlist_inc;

612

rlist_max = (rlist_nstlist10 + rlist_inc)*pow(listfac_max, 1.0/3.0) - rlist_inc;

613

if (debug)

614

{

615

fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n",

616

rlist_inc, rlist_ok, rlist_max);

617

}

618

619

nstlist_prev = nstlist_orig;

620

rlist_prev = ir->rlist;

621

622

{

623

if (nstlist_cmdline <= 0)

624

{

625

ir->nstlist = nstlist_try[nstlist_ind];

626

}

627

628

/* Set the pair-list buffer size in ir */

629

calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL((void*)0), &rlist_new);

630

631

/* Does rlist fit in the box? */

632

bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC, box));

633

bDD = TRUE1;

634

if (bBox && DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes >
1)))

635

{

636

/* Check if rlist fits in the domain decomposition */

637

if (inputrec2nboundeddim(ir) < DIM3)

638

{

639

gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet")_gmx_error("incons", "Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet"
, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c",
639);

640

}

641

copy_mat(box, state_tmp.box);

642

bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new);

643

}

644

645

if (debug)

646

{

647

fprintf(debug, "nstlist %d rlist %.3f bBox %d bDD %d\n",

648

ir->nstlist, rlist_new, bBox, bDD);

649

}

650

651

bCont = FALSE0;

652

653

if (nstlist_cmdline <= 0)

654

{

655

if (bBox && bDD && rlist_new <= rlist_max)

656

{

657

/* Increase nstlist */

658

nstlist_prev = ir->nstlist;

659

rlist_prev = rlist_new;

660

bCont = (nstlist_ind+1 < NNSTLsizeof(nstlist_try)/sizeof(nstlist_try[0]) && rlist_new < rlist_ok);

661

}

662

else

663

{

664

/* Stick with the previous nstlist */

665

ir->nstlist = nstlist_prev;

666

rlist_new = rlist_prev;

667

bBox = TRUE1;

668

bDD = TRUE1;

669

}

670

}

671

672

nstlist_ind++;

673

}

674

while (bCont);

675

676

if (!bBox || !bDD)

677

{

678

gmx_warning(!bBox ? box_err : dd_err);

679

if (fp != NULL((void*)0))

680

{

681

fprintf(fp, "\n%s\n", bBox ? box_err : dd_err);

682

}

683

ir->nstlist = nstlist_orig;

684

}

685

else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)

686

{

687

sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g",

688

nstlist_orig, ir->nstlist,

689

ir->rlist, rlist_new);

690

if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))

691

{

692

fprintf(stderrstderr, "%s\n\n", buf);

693

}

694

if (fp != NULL((void*)0))

695

{

696

fprintf(fp, "%s\n\n", buf);

697

}

698

ir->rlist = rlist_new;

699

ir->rlistlong = rlist_new;

700

}

701

}

702

703

static void prepare_verlet_scheme(FILE *fplog,

704

t_commrec *cr,

705

t_inputrec *ir,

706

int nstlist_cmdline,

707

const gmx_mtop_t *mtop,

708

matrix box,

709

gmx_bool bUseGPU)

710

{

711

/* For NVE simulations, we will retain the initial list buffer */

712

if (ir->verletbuf_tol > 0 && !(EI_MD(ir->eI)((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) && ir->etc == etcNO))

713

{

714

/* Update the Verlet buffer size for the current run setup */

715

verletbuf_list_setup_t ls;

716

real rlist_new;

717

718

/* Here we assume SIMD-enabled kernels are being used. But as currently

719

* calc_verlet_buffer_size gives the same results for 4x8 and 4x4

720

* and 4x2 gives a larger buffer than 4x4, this is ok.

721

722

verletbuf_get_list_setup(bUseGPU, &ls);

723

724

calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL((void*)0), &rlist_new);

725

726

if (rlist_new != ir->rlist)

727

{

728

if (fplog != NULL((void*)0))

729

{

730

fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",

731

ir->rlist, rlist_new,

732

ls.cluster_size_i, ls.cluster_size_j);

733

}

734

ir->rlist = rlist_new;

735

ir->rlistlong = rlist_new;

736

}

737

}

738

739

if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI)(((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) || ((ir->eI) == eiSD1 || (ir->eI) == eiSD2) ||
(ir->eI) == eiBD) || ir->verletbuf_tol <= 0))

740

{

741

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 741, "Can not set nstlist without %s",

742

!EI_DYNAMICS(ir->eI)(((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) || ((ir->eI) == eiSD1 || (ir->eI) == eiSD2) ||
(ir->eI) == eiBD) ? "dynamics" : "verlet-buffer-tolerance");

743

}

744

745

if (EI_DYNAMICS(ir->eI)(((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) || ((ir->eI) == eiSD1 || (ir->eI) == eiSD2) ||
(ir->eI) == eiBD))

746

{

747

/* Set or try nstlist values */

748

increase_nstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, bUseGPU);

749

}

750

}

751

752

static void convert_to_verlet_scheme(FILE *fplog,

753

t_inputrec *ir,

754

gmx_mtop_t *mtop, real box_vol)

755

{

756

char *conv_mesg = "Converting input file with group cut-off scheme to the Verlet cut-off scheme";

757

758

md_print_warn(NULL((void*)0), fplog, "%s\n", conv_mesg);

759

760

ir->cutoff_scheme = ecutsVERLET;

761

ir->verletbuf_tol = 0.005;

762

763

if (ir->rcoulomb != ir->rvdw)

764

{

765

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 765, "The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");

766

}

767

768

if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype)((ir->coulombtype) == eelUSER || (ir->coulombtype) == eelPMEUSER
|| (ir->coulombtype) == (eelPMEUSERSWITCH)))

769

{

770

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 770, "User non-bonded potentials are not (yet) supported with the Verlet scheme");

771

}

772

else if (ir_vdw_switched(ir) || ir_coulomb_switched(ir))

773

{

774

if (ir_vdw_switched(ir) && ir->vdw_modifier == eintmodNONE)

775

{

776

ir->vdwtype = evdwCUT;

777

778

switch (ir->vdwtype)

779

{

780

case evdwSHIFT: ir->vdw_modifier = eintmodFORCESWITCH; break;

781

case evdwSWITCH: ir->vdw_modifier = eintmodPOTSWITCH; break;

782

default: gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 782, "The Verlet scheme does not support Van der Waals interactions of type '%s'", evdw_names[ir->vdwtype]);

783

}

784

}

785

if (ir_coulomb_switched(ir) && ir->coulomb_modifier == eintmodNONE)

786

{

787

if (EEL_FULL(ir->coulombtype)((((ir->coulombtype) == eelPME || (ir->coulombtype) == eelPMESWITCH
|| (ir->coulombtype) == eelPMEUSER || (ir->coulombtype
) == eelPMEUSERSWITCH || (ir->coulombtype) == eelP3M_AD) ||
(ir->coulombtype) == eelEWALD) || (ir->coulombtype) ==
eelPOISSON))

788

{

789

/* With full electrostatic only PME can be switched */

790

ir->coulombtype = eelPME;

791

ir->coulomb_modifier = eintmodPOTSHIFT;

792

}

793

else

794

{

795

md_print_warn(NULL((void*)0), fplog, "NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n", eel_names[ir->coulombtype]);

796

ir->coulombtype = eelRF;

797

ir->epsilon_rf = 0.0;

798

ir->coulomb_modifier = eintmodPOTSHIFT;

799

}

800

}

801

802

/* We set the pair energy error tolerance to a small number.

803

* Note that this is only for testing. For production the user

804

* should think about this and set the mdp options.

805

806

ir->verletbuf_tol = 1e-4;

807

}

808

809

if (inputrec2nboundeddim(ir) != 3)

810

{

811

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 811, "Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");

812

}

813

814

if (ir->efep != efepNO || ir->implicit_solvent != eisNO)

815

{

816

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 816, "Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");

817

}

818

819

if (EI_DYNAMICS(ir->eI)(((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) || ((ir->eI) == eiSD1 || (ir->eI) == eiSD2) ||
(ir->eI) == eiBD) && !(EI_MD(ir->eI)((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) && ir->etc == etcNO))

820

{

821

verletbuf_list_setup_t ls;

822

823

verletbuf_get_list_setup(FALSE0, &ls);

824

calc_verlet_buffer_size(mtop, box_vol, ir, -1, &ls, NULL((void*)0), &ir->rlist);

825

}

826

else

827

{

828

real rlist_fac;

829

830

if (EI_MD(ir->eI)((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)))

831

{

832

rlist_fac = 1 + verlet_buffer_ratio_NVE_T0;

833

}

834

else

835

{

836

rlist_fac = 1 + verlet_buffer_ratio_nodynamics;

837

}

838

ir->verletbuf_tol = -1;

839

ir->rlist = rlist_fac*max(ir->rvdw, ir->rcoulomb)(((ir->rvdw) > (ir->rcoulomb)) ? (ir->rvdw) : (ir
->rcoulomb) );

840

}

841

842

gmx_mtop_remove_chargegroups(mtop);

843

}

844

845

static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)

846

{

847

fprintf(fp, "hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",

848

hw_opt->nthreads_tot,

849

hw_opt->nthreads_tmpi,

850

hw_opt->nthreads_omp,

851

hw_opt->nthreads_omp_pme,

852

hw_opt->gpu_opt.gpu_id != NULL((void*)0) ? hw_opt->gpu_opt.gpu_id : "");

853

}

854

855

/* Checks we can do when we don't (yet) know the cut-off scheme */

856

static void check_and_update_hw_opt_1(gmx_hw_opt_t *hw_opt,

857

gmx_bool bIsSimMaster)

858

{

859

gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, bIsSimMaster);

860

861

#ifndef GMX_THREAD_MPI

862

if (hw_opt->nthreads_tot > 0)

863

{

864

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 864, "Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");

865

}

866

if (hw_opt->nthreads_tmpi > 0)

867

{

868

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 868, "Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");

869

}

870

#endif

871

872

#ifndef GMX_OPENMP

873

if (hw_opt->nthreads_omp > 1)

874

{

875

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 875, "More than 1 OpenMP thread requested, but Gromacs was compiled without OpenMP support");

876

}

877

hw_opt->nthreads_omp = 1;

878

#endif

879

880

if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)

881

{

882

/* We have the same number of OpenMP threads for PP and PME processes,

883

* thus we can perform several consistency checks.

884

885

if (hw_opt->nthreads_tmpi > 0 &&

886

hw_opt->nthreads_omp > 0 &&

887

hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)

888

{

889

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 889, "The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",

890

hw_opt->nthreads_tot, hw_opt->nthreads_tmpi, hw_opt->nthreads_omp);

891

}

892

893

if (hw_opt->nthreads_tmpi > 0 &&

894

hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)

895

{

896

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 896, "The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",

897

hw_opt->nthreads_tot, hw_opt->nthreads_tmpi);

898

}

899

900

if (hw_opt->nthreads_omp > 0 &&

901

hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)

902

{

903

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 903, "The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",

904

hw_opt->nthreads_tot, hw_opt->nthreads_omp);

905

}

906

907

if (hw_opt->nthreads_tmpi > 0 &&

908

hw_opt->nthreads_omp <= 0)

909

{

910

hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;

911

}

912

}

913

914

#ifndef GMX_OPENMP

915

if (hw_opt->nthreads_omp > 1)

916

{

917

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 917, "OpenMP threads are requested, but Gromacs was compiled without OpenMP support");

918

}

919

#endif

920

921

if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)

922

{

923

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 923, "You need to specify -ntomp in addition to -ntomp_pme");

924

}

925

926

if (hw_opt->nthreads_tot == 1)

927

{

928

hw_opt->nthreads_tmpi = 1;

929

930

if (hw_opt->nthreads_omp > 1)

931

{

932

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 932, "You requested %d OpenMP threads with %d total threads",

933

hw_opt->nthreads_tmpi, hw_opt->nthreads_tot);

934

}

935

hw_opt->nthreads_omp = 1;

936

}

937

938

if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)

939

{

940

hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;

941

}

942

943

/* Parse GPU IDs, if provided.

944

* We check consistency with the tMPI thread count later.

945

946

gmx_parse_gpu_ids(&hw_opt->gpu_opt);

947

948

#ifdef GMX_THREAD_MPI

949

if (hw_opt->gpu_opt.ncuda_dev_use > 0 && hw_opt->nthreads_tmpi == 0)

950

{

951

/* Set the number of MPI threads equal to the number of GPUs */

952

hw_opt->nthreads_tmpi = hw_opt->gpu_opt.ncuda_dev_use;

953

954

if (hw_opt->nthreads_tot > 0 &&

955

hw_opt->nthreads_tmpi > hw_opt->nthreads_tot)

956

{

957

/* We have more GPUs than total threads requested.

958

* We choose to (later) generate a mismatch error,

959

* instead of launching more threads than requested.

960

961

hw_opt->nthreads_tmpi = hw_opt->nthreads_tot;

962

}

963

}

964

#endif

965

966

if (debug)

967

{

968

print_hw_opt(debug, hw_opt);

969

}

970

}

971

972

/* Checks we can do when we know the cut-off scheme */

973

static void check_and_update_hw_opt_2(gmx_hw_opt_t *hw_opt,

974

int cutoff_scheme)

975

{

976

if (cutoff_scheme == ecutsGROUP)

977

{

978

/* We only have OpenMP support for PME only nodes */

979

if (hw_opt->nthreads_omp > 1)

980

{

981

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 981, "OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",

982

ecutscheme_names[cutoff_scheme],

983

ecutscheme_names[ecutsVERLET]);

984

}

985

hw_opt->nthreads_omp = 1;

986

}

987

988

if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)

989

{

990

hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;

991

}

992

993

if (debug)

994

{

995

print_hw_opt(debug, hw_opt);

996

}

997

}

998

999

1000

/* Override the value in inputrec with value passed on the command line (if any) */

1001

static void override_nsteps_cmdline(FILE *fplog,

1002

gmx_int64_t nsteps_cmdline,

1003

t_inputrec *ir,

1004

const t_commrec *cr)

1005

{

1006

char sbuf[STEPSTRSIZE22];

1007

1008

assert(ir)((void) (0));

1009

assert(cr)((void) (0));

1010

1011

/* override with anything else than the default -2 */

1012

if (nsteps_cmdline > -2)

1013

{

1014

char stmp[STRLEN4096];

1015

1016

ir->nsteps = nsteps_cmdline;

1017

if (EI_DYNAMICS(ir->eI)(((ir->eI) == eiMD || ((ir->eI) == eiVV || (ir->eI) ==
eiVVAK)) || ((ir->eI) == eiSD1 || (ir->eI) == eiSD2) ||
(ir->eI) == eiBD))

1018

{

1019

sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps, %.3f ps",

1020

gmx_step_str(nsteps_cmdline, sbuf),

1021

nsteps_cmdline*ir->delta_t);

1022

}

1023

else

1024

{

1025

sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps",

1026

gmx_step_str(nsteps_cmdline, sbuf));

1027

}

1028

1029

md_print_warn(cr, fplog, "%s\n", stmp);

1030

}

1031

}

1032

1033

/* Frees GPU memory and destroys the CUDA context.

1034

1035

* Note that this function needs to be called even if GPUs are not used

1036

* in this run because the PME ranks have no knowledge of whether GPUs

1037

* are used or not, but all ranks need to enter the barrier below.

1038

1039

static void free_gpu_resources(const t_forcerec *fr,

1040

const t_commrec *cr)

1041

{

1042

gmx_bool bIsPPrankUsingGPU;

1043

char gpu_err_str[STRLEN4096];

1044

1045

bIsPPrankUsingGPU = (cr->duty & DUTY_PP(1<<0)) && fr->nbv != NULL((void*)0) && fr->nbv->bUseGPU;

1046

1047

if (bIsPPrankUsingGPU)

1048

{

1049

/* free nbnxn data in GPU memory */

1050

nbnxn_cuda_free(fr->nbv->cu_nbv);

1051

1052

/* With tMPI we need to wait for all ranks to finish deallocation before

1053

* destroying the context in free_gpu() as some ranks may be sharing

1054

* GPU and context.

1055

* Note: as only PP ranks need to free GPU resources, so it is safe to

1056

* not call the barrier on PME ranks.

1057

1058

#ifdef GMX_THREAD_MPI

1059

if (PAR(cr)((cr)->nnodes > 1))

1060

{

1061

gmx_barrier(cr);

1062

}

1063

#endif /* GMX_THREAD_MPI */

1064

1065

/* uninitialize GPU (by destroying the context) */

1066

if (!free_gpu(gpu_err_str))

1067

{

1068

gmx_warning("On node %d failed to free GPU #%d: %s",

1069

cr->nodeid, get_current_gpu_device_id(), gpu_err_str);

1070

}

1071

}

1072

}

1073

1074

int mdrunner(gmx_hw_opt_t *hw_opt,

1075

FILE *fplog, t_commrec *cr, int nfile,

1076

const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,

1077

gmx_bool bCompact, int nstglobalcomm,

1078

ivec ddxyz, int dd_node_order, real rdd, real rconstr,

1079

const char *dddlb_opt, real dlb_scale,

1080

const char *ddcsx, const char *ddcsy, const char *ddcsz,

1081

const char *nbpu_opt, int nstlist_cmdline,

1082

gmx_int64_t nsteps_cmdline, int nstepout, int resetstep,

1083

int gmx_unused__attribute__ ((unused)) nmultisim, int repl_ex_nst, int repl_ex_nex,

1084

int repl_ex_seed, real pforce, real cpt_period, real max_hours,

1085

const char *deviceOptions, int imdport, unsigned long Flags)

1086

{

1087

gmx_bool bForceUseGPU, bTryUseGPU;

1088

double nodetime = 0, realtime;

1089

t_inputrec *inputrec;

1090

t_state *state = NULL((void*)0);

1091

matrix box;

1092

gmx_ddbox_t ddbox = {0};

1093

int npme_major, npme_minor;

1094

real tmpr1, tmpr2;

1095

t_nrnb *nrnb;

1096

gmx_mtop_t *mtop = NULL((void*)0);

1097

t_mdatoms *mdatoms = NULL((void*)0);

1098

t_forcerec *fr = NULL((void*)0);

'fr' initialized to a null pointer value

→

1099

t_fcdata *fcd = NULL((void*)0);

1100

real ewaldcoeff_q = 0;

1101

real ewaldcoeff_lj = 0;

1102

gmx_pme_t *pmedata = NULL((void*)0);

1103

gmx_vsite_t *vsite = NULL((void*)0);

1104

gmx_constr_t constr;

1105

int i, m, nChargePerturbed = -1, nTypePerturbed = 0, status, nalloc;

1106

char *gro;

1107

gmx_wallcycle_t wcycle;

1108

gmx_bool bReadEkin;

1109

int list;

1110

gmx_walltime_accounting_t walltime_accounting = NULL((void*)0);

1111

int rc;

1112

gmx_int64_t reset_counters;

1113

gmx_edsam_t ed = NULL((void*)0);

1114

t_commrec *cr_old = cr;

1115

int nthreads_pme = 1;

1116

int nthreads_pp = 1;

1117

gmx_membed_t membed = NULL((void*)0);

1118

gmx_hw_info_t *hwinfo = NULL((void*)0);

1119

/* The master rank decides early on bUseGPU and broadcasts this later */

1120

gmx_bool bUseGPU = FALSE0;

1121

1122

/* CAUTION: threads may be started later on in this function, so

1123

cr doesn't reflect the final parallel state right now */

1124

snew(inputrec, 1)(inputrec) = save_calloc("inputrec", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1124, (1), sizeof(*(inputrec)));

1125

snew(mtop, 1)(mtop) = save_calloc("mtop", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1125, (1), sizeof(*(mtop)));

1126

1127

if (Flags & MD_APPENDFILES(1<<15))

←

Taking false branch

→

1128

{

1129

fplog = NULL((void*)0);

1130

}

1131

1132

bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3)(__extension__ (__builtin_constant_p (3) && ((__builtin_constant_p
(nbpu_opt) && strlen (nbpu_opt) < ((size_t) (3)))
|| (__builtin_constant_p ("gpu") && strlen ("gpu") <
((size_t) (3)))) ? __extension__ ({ size_t __s1_len, __s2_len
; (__builtin_constant_p (nbpu_opt) && __builtin_constant_p
("gpu") && (__s1_len = strlen (nbpu_opt), __s2_len =
strlen ("gpu"), (!((size_t)(const void *)((nbpu_opt) + 1) - (
size_t)(const void *)(nbpu_opt) == 1) || __s1_len >= 4) &&
(!((size_t)(const void *)(("gpu") + 1) - (size_t)(const void
*)("gpu") == 1) || __s2_len >= 4)) ? __builtin_strcmp (nbpu_opt
, "gpu") : (__builtin_constant_p (nbpu_opt) && ((size_t
)(const void *)((nbpu_opt) + 1) - (size_t)(const void *)(nbpu_opt
) == 1) && (__s1_len = strlen (nbpu_opt), __s1_len <
4) ? (__builtin_constant_p ("gpu") && ((size_t)(const
void *)(("gpu") + 1) - (size_t)(const void *)("gpu") == 1) ?
__builtin_strcmp (nbpu_opt, "gpu") : (__extension__ ({ const
unsigned char *__s2 = (const unsigned char *) (const char *)
("gpu"); int __result = (((const unsigned char *) (const char
*) (nbpu_opt))[0] - __s2[0]); if (__s1_len > 0 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (nbpu_opt))[1] - __s2[1]); if (__s1_len > 1 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (nbpu_opt))[2] - __s2[2]); if (__s1_len > 2 &&
__result == 0) __result = (((const unsigned char *) (const char
*) (nbpu_opt))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p
("gpu") && ((size_t)(const void *)(("gpu") + 1) - (size_t
)(const void *)("gpu") == 1) && (__s2_len = strlen ("gpu"
), __s2_len < 4) ? (__builtin_constant_p (nbpu_opt) &&
((size_t)(const void *)((nbpu_opt) + 1) - (size_t)(const void
*)(nbpu_opt) == 1) ? __builtin_strcmp (nbpu_opt, "gpu") : (-
(__extension__ ({ const unsigned char *__s2 = (const unsigned
char *) (const char *) (nbpu_opt); int __result = (((const unsigned
char *) (const char *) ("gpu"))[0] - __s2[0]); if (__s2_len >
0 && __result == 0) { __result = (((const unsigned char
*) (const char *) ("gpu"))[1] - __s2[1]); if (__s2_len > 1
&& __result == 0) { __result = (((const unsigned char
*) (const char *) ("gpu"))[2] - __s2[2]); if (__s2_len > 2
&& __result == 0) __result = (((const unsigned char *
) (const char *) ("gpu"))[3] - __s2[3]); } } __result; })))) :
__builtin_strcmp (nbpu_opt, "gpu")))); }) : strncmp (nbpu_opt
, "gpu", 3))) == 0);

1133

bTryUseGPU = (strncmp(nbpu_opt, "auto", 4)(__extension__ (__builtin_constant_p (4) && ((__builtin_constant_p
(nbpu_opt) && strlen (nbpu_opt) < ((size_t) (4)))
|| (__builtin_constant_p ("auto") && strlen ("auto")
< ((size_t) (4)))) ? __extension__ ({ size_t __s1_len, __s2_len
; (__builtin_constant_p (nbpu_opt) && __builtin_constant_p
("auto") && (__s1_len = strlen (nbpu_opt), __s2_len =
strlen ("auto"), (!((size_t)(const void *)((nbpu_opt) + 1) -
(size_t)(const void *)(nbpu_opt) == 1) || __s1_len >= 4) &&
(!((size_t)(const void *)(("auto") + 1) - (size_t)(const void
*)("auto") == 1) || __s2_len >= 4)) ? __builtin_strcmp (nbpu_opt
, "auto") : (__builtin_constant_p (nbpu_opt) && ((size_t
)(const void *)((nbpu_opt) + 1) - (size_t)(const void *)(nbpu_opt
) == 1) && (__s1_len = strlen (nbpu_opt), __s1_len <
4) ? (__builtin_constant_p ("auto") && ((size_t)(const
void *)(("auto") + 1) - (size_t)(const void *)("auto") == 1)
? __builtin_strcmp (nbpu_opt, "auto") : (__extension__ ({ const
unsigned char *__s2 = (const unsigned char *) (const char *)
("auto"); int __result = (((const unsigned char *) (const char
*) (nbpu_opt))[0] - __s2[0]); if (__s1_len > 0 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (nbpu_opt))[1] - __s2[1]); if (__s1_len > 1 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (nbpu_opt))[2] - __s2[2]); if (__s1_len > 2 &&
__result == 0) __result = (((const unsigned char *) (const char
*) (nbpu_opt))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p
("auto") && ((size_t)(const void *)(("auto") + 1) - (
size_t)(const void *)("auto") == 1) && (__s2_len = strlen
("auto"), __s2_len < 4) ? (__builtin_constant_p (nbpu_opt
) && ((size_t)(const void *)((nbpu_opt) + 1) - (size_t
)(const void *)(nbpu_opt) == 1) ? __builtin_strcmp (nbpu_opt,
"auto") : (- (__extension__ ({ const unsigned char *__s2 = (
const unsigned char *) (const char *) (nbpu_opt); int __result
= (((const unsigned char *) (const char *) ("auto"))[0] - __s2
[0]); if (__s2_len > 0 && __result == 0) { __result
= (((const unsigned char *) (const char *) ("auto"))[1] - __s2
[1]); if (__s2_len > 1 && __result == 0) { __result
= (((const unsigned char *) (const char *) ("auto"))[2] - __s2
[2]); if (__s2_len > 2 && __result == 0) __result =
(((const unsigned char *) (const char *) ("auto"))[3] - __s2
[3]); } } __result; })))) : __builtin_strcmp (nbpu_opt, "auto"
)))); }) : strncmp (nbpu_opt, "auto", 4))) == 0) || bForceUseGPU;

1134

1135

/* Detect hardware, gather information. This is an operation that is

1136

* global for this process (MPI rank). */

1137

hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU);

1138

1139

1140

snew(state, 1)(state) = save_calloc("state", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1140, (1), sizeof(*(state)));

1141

if (SIMMASTER(cr)(((((cr)->nodeid == 0) || !((cr)->nnodes > 1)) &&
((cr)->duty & (1<<0))) || !((cr)->nnodes >
1)))

←

Taking true branch

→

1142

{

1143

/* Read (nearly) all data required for the simulation */

1144

read_tpx_state(ftp2fn(efTPX, nfile, fnm), inputrec, state, NULL((void*)0), mtop);

1145

1146

if (inputrec->cutoff_scheme != ecutsVERLET &&

1147

((Flags & MD_TESTVERLET(1<<22)) || getenv("GMX_VERLET_SCHEME") != NULL((void*)0)))

1148

{

1149

convert_to_verlet_scheme(fplog, inputrec, mtop, det(state->box));

1150

}

1151

1152

if (inputrec->cutoff_scheme == ecutsVERLET)

←

Taking true branch

→

1153

{

1154

/* Here the master rank decides if all ranks will use GPUs */

1155

bUseGPU = (hwinfo->gpu_info.ncuda_dev_compatible > 0 ||

1156

getenv("GMX_EMULATE_GPU") != NULL((void*)0));

1157

1158

/* TODO add GPU kernels for this and replace this check by:

1159

* (bUseGPU && (ir->vdwtype == evdwPME &&

1160

* ir->ljpme_combination_rule == eljpmeLB))

1161

* update the message text and the content of nbnxn_acceleration_supported.

1162

1163

if (bUseGPU &&

1164

!nbnxn_acceleration_supported(fplog, cr, inputrec, bUseGPU))

1165

{

1166

/* Fallback message printed by nbnxn_acceleration_supported */

1167

if (bForceUseGPU)

1168

{

1169

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1169, "GPU acceleration requested, but not supported with the given input settings");

1170

}

1171

bUseGPU = FALSE0;

1172

}

1173

1174

prepare_verlet_scheme(fplog, cr,

1175

inputrec, nstlist_cmdline, mtop, state->box,

1176

bUseGPU);

1177

}

1178

else

1179

{

1180

if (nstlist_cmdline > 0)

1181

{

1182

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1182, "Can not set nstlist with the group cut-off scheme");

1183

}

1184

1185

if (hwinfo->gpu_info.ncuda_dev_compatible > 0)

1186

{

1187

md_print_warn(cr, fplog,

1188

"NOTE: GPU(s) found, but the current simulation can not use GPUs\n"

1189

" To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"

1190

" (for quick performance testing you can use the -testverlet option)\n");

1191

}

1192

1193

if (bForceUseGPU)

1194

{

1195

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1195, "GPU requested, but can't be used without cutoff-scheme=Verlet");

1196

}

1197

1198

#ifdef GMX_TARGET_BGQ

1199

md_print_warn(cr, fplog,

1200

"NOTE: There is no SIMD implementation of the group scheme kernels on\n"

1201

" BlueGene/Q. You will observe better performance from using the\n"

1202

" Verlet cut-off scheme.\n");

1203

#endif

1204

}

1205

}

1206

1207

/* Check and update the hardware options for internal consistency */

1208

check_and_update_hw_opt_1(hw_opt, SIMMASTER(cr)(((((cr)->nodeid == 0) || !((cr)->nnodes > 1)) &&
((cr)->duty & (1<<0))) || !((cr)->nnodes >
1)));

1209

1210

/* Early check for externally set process affinity. */

1211

gmx_check_thread_affinity_set(fplog, cr,

1212

hw_opt, hwinfo->nthreads_hw_avail, FALSE0);

1213

if (SIMMASTER(cr)(((((cr)->nodeid == 0) || !((cr)->nnodes > 1)) &&
((cr)->duty & (1<<0))) || !((cr)->nnodes >
1)))

←

Taking true branch

→

1214

{

1215

1216

#ifdef GMX_THREAD_MPI

1217

if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)

1218

{

1219

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1219, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");

1220

}

1221

#endif

1222

1223

if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&

1224

cr->npmenodes <= 0)

1225

{

1226

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1226, "You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");

1227

}

1228

}

1229

1230

#ifdef GMX_THREAD_MPI

1231

if (SIMMASTER(cr)(((((cr)->nodeid == 0) || !((cr)->nnodes > 1)) &&
((cr)->duty & (1<<0))) || !((cr)->nnodes >
1)))

←

Taking true branch

→

1232

{

1233

/* Since the master knows the cut-off scheme, update hw_opt for this.

1234

* This is done later for normal MPI and also once more with tMPI

1235

* for all tMPI ranks.

1236

1237

check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);

1238

1239

/* NOW the threads will be started: */

1240

hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,

1241

hw_opt,

1242

inputrec, mtop,

1243

cr, fplog);

1244

if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)

1245

{

1246

hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;

1247

}

1248

1249

if (hw_opt->nthreads_tmpi > 1)

←

Taking true branch

→

1250

{

1251

/* now start the threads. */

1252

cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,

1253

oenv, bVerbose, bCompact, nstglobalcomm,

1254

ddxyz, dd_node_order, rdd, rconstr,

1255

dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,

1256

nbpu_opt, nstlist_cmdline,

1257

nsteps_cmdline, nstepout, resetstep, nmultisim,

1258

repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,

1259

cpt_period, max_hours, deviceOptions,

1260

Flags);

1261

/* the main thread continues here with a new cr. We don't deallocate

1262

the old cr because other threads may still be reading it. */

1263

if (cr == NULL((void*)0))

←

Assuming 'cr' is not equal to null

→

←

Taking false branch

→

1264

{

1265

gmx_comm("Failed to spawn threads")_gmx_error("comm", "Failed to spawn threads", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1265);

1266

}

1267

}

1268

}

1269

#endif

1270

/* END OF CAUTION: cr is now reliable */

1271

1272

/* g_membed initialisation *

1273

* Because we change the mtop, init_membed is called before the init_parallel *

1274

* (in case we ever want to make it run in parallel) */

1275

if (opt2bSet("-membed", nfile, fnm))

←

Taking false branch

→

1276

{

1277

if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))

1278

{

1279

fprintf(stderrstderr, "Initializing membed");

1280

}

1281

membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period);

1282

}

1283

1284

if (PAR(cr)((cr)->nnodes > 1))

←

Taking false branch

→

1285

{

1286

/* now broadcast everything to the non-master nodes/threads: */

1287

init_parallel(cr, inputrec, mtop);

1288

}

1289

if (fplog != NULL((void*)0))

←

Assuming 'fplog' is equal to null

→

←

Taking false branch

→

1290

{

1291

pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE0);

1292

}

1293

1294

/* now make sure the state is initialized and propagated */

1295

set_state_entries(state, inputrec);

1296

1297

/* A parallel command line option consistency check that we can

1298

only do after any threads have started. */

1299

if (!PAR(cr)((cr)->nnodes > 1) &&

←

Taking false branch

→

1300

(ddxyz[XX0] > 1 || ddxyz[YY1] > 1 || ddxyz[ZZ2] > 1 || cr->npmenodes > 0))

1301

{

1302

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1302,

1303

"The -dd or -npme option request a parallel simulation, "

1304

#ifndef GMX_MPI

1305

"but %s was compiled without threads or MPI enabled"

1306

#else

1307

#ifdef GMX_THREAD_MPI

1308

"but the number of threads (option -nt) is 1"

1309

#else

1310

"but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"

1311

#endif

1312

#endif

1313

, ShortProgram()

1314

);

1315

}

1316

1317

if ((Flags & MD_RERUN(1<<4)) &&

1318

(EI_ENERGY_MINIMIZATION(inputrec->eI)((inputrec->eI) == eiSteep || (inputrec->eI) == eiCG ||
(inputrec->eI) == eiLBFGS) || eiNM == inputrec->eI))

1319

{

1320

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1320, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");

1321

}

1322

1323

if (can_use_allvsall(inputrec, TRUE1, cr, fplog) && DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes >
1)))

1324

{

1325

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1325, "All-vs-all loops do not work with domain decomposition, use a single MPI rank");

1326

}

1327

1328

if (!(EEL_PME(inputrec->coulombtype)((inputrec->coulombtype) == eelPME || (inputrec->coulombtype
) == eelPMESWITCH || (inputrec->coulombtype) == eelPMEUSER
|| (inputrec->coulombtype) == eelPMEUSERSWITCH || (inputrec
->coulombtype) == eelP3M_AD) || EVDW_PME(inputrec->vdwtype)((inputrec->vdwtype) == evdwPME)))

←

Taking true branch

→

1329

{

1330

if (cr->npmenodes > 0)

←

Taking false branch

→

1331

{

1332

gmx_fatal_collective(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1332, cr, NULL((void*)0),

1333

"PME nodes are requested, but the system does not use PME electrostatics or LJ-PME");

1334

}

1335

1336

cr->npmenodes = 0;

1337

}

1338

1339

#ifdef GMX_FAHCORE

1340

if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))

1341

{

1342

fcRegisterSteps(inputrec->nsteps, inputrec->init_step);

1343

}

1344

#endif

1345

1346

/* NMR restraints must be initialized before load_checkpoint,

1347

* since with time averaging the history is added to t_state.

1348

* For proper consistency check we therefore need to extend

1349

* t_state here.

1350

* So the PME-only nodes (if present) will also initialize

1351

* the distance restraints.

1352

1353

snew(fcd, 1)(fcd) = save_calloc("fcd", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1353, (1), sizeof(*(fcd)));

1354

1355

/* This needs to be called before read_checkpoint to extend the state */

1356

init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0);

←

Assuming 'repl_ex_nst' is <= 0

→

1357

1358

init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires),

1359

state);

1360

1361

if (DEFORM(*inputrec)((*inputrec).deform[0][0] != 0 || (*inputrec).deform[1][1] !=
0 || (*inputrec).deform[2][2] != 0 || (*inputrec).deform[1][
0] != 0 || (*inputrec).deform[2][0] != 0 || (*inputrec).deform
[2][1] != 0))

←

Taking false branch

→

1362

{

1363

/* Store the deform reference box before reading the checkpoint */

1364

if (SIMMASTER(cr)(((((cr)->nodeid == 0) || !((cr)->nnodes > 1)) &&
((cr)->duty & (1<<0))) || !((cr)->nnodes >
1)))

1365

{

1366

copy_mat(state->box, box);

1367

}

1368

if (PAR(cr)((cr)->nnodes > 1))

1369

{

1370

gmx_bcast(sizeof(box), box, cr);

1371

}

1372

/* Because we do not have the update struct available yet

1373

* in which the reference values should be stored,

1374

* we store them temporarily in static variables.

1375

* This should be thread safe, since they are only written once

1376

* and with identical values.

1377

1378

tMPI_Thread_mutex_lock(&deform_init_box_mutex);

1379

deform_init_init_step_tpx = inputrec->init_step;

1380

copy_mat(box, deform_init_box_tpx);

1381

tMPI_Thread_mutex_unlock(&deform_init_box_mutex);

1382

}

1383

1384

if (opt2bSet("-cpi", nfile, fnm))

←

Taking false branch

→

1385

{

1386

/* Check if checkpoint file exists before doing continuation.

1387

* This way we can use identical input options for the first and subsequent runs...

1388

1389

if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) )

1390

{

1391

load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,

1392

cr, ddxyz,

1393

inputrec, state, &bReadEkin,

1394

(Flags & MD_APPENDFILES(1<<15)),

1395

(Flags & MD_APPENDFILESSET(1<<21)));

1396

1397

if (bReadEkin)

1398

{

1399

Flags |= MD_READ_EKIN(1<<17);

1400

}

1401

}

1402

}

1403

1404

if (((MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)) || (Flags & MD_SEPPOT(1<<7))) && (Flags & MD_APPENDFILES(1<<15)))

1405

#ifdef GMX_THREAD_MPI

1406

/* With thread MPI only the master node/thread exists in mdrun.c,

1407

* therefore non-master nodes need to open the "seppot" log file here.

1408

1409

|| (!MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)) && (Flags & MD_SEPPOT(1<<7)))

1410

#endif

1411

)

1412

{

1413

gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, !(Flags & MD_SEPPOT(1<<7)),

1414

Flags, &fplog);

1415

}

1416

1417

/* override nsteps with value from cmdline */

1418

override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);

1419

1420

if (SIMMASTER(cr)(((((cr)->nodeid == 0) || !((cr)->nnodes > 1)) &&
((cr)->duty & (1<<0))) || !((cr)->nnodes >
1)))

←

Taking true branch

→

1421

{

1422

copy_mat(state->box, box);

1423

}

1424

1425

if (PAR(cr)((cr)->nnodes > 1))

←

Taking false branch

→

1426

{

1427

gmx_bcast(sizeof(box), box, cr);

1428

}

1429

1430

/* Essential dynamics */

1431

if (opt2bSet("-ei", nfile, fnm))

←

Taking false branch

→

1432

{

1433

/* Open input and output files, allocate space for ED data structure */

1434

ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr);

1435

}

1436

1437

if (PAR(cr)((cr)->nnodes > 1) && !(EI_TPI(inputrec->eI)((inputrec->eI) == eiTPI || (inputrec->eI) == eiTPIC) ||

1438

inputrec->eI == eiNM))

1439

{

1440

cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr,

1441

dddlb_opt, dlb_scale,

1442

ddcsx, ddcsy, ddcsz,

1443

mtop, inputrec,

1444

box, state->x,

1445

&ddbox, &npme_major, &npme_minor);

1446

1447

make_dd_communicators(fplog, cr, dd_node_order);

1448

1449

/* Set overallocation to avoid frequent reallocation of arrays */

1450

set_over_alloc_dd(TRUE1);

1451

}

1452

else

1453

{

1454

/* PME, if used, is done on all nodes with 1D decomposition */

1455

cr->npmenodes = 0;

1456

cr->duty = (DUTY_PP(1<<0) | DUTY_PME(1<<1));

1457

npme_major = 1;

1458

npme_minor = 1;

1459

1460

if (inputrec->ePBC == epbcSCREW)

←

Taking false branch

→

1461

{

1462

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1462,

1463

"pbc=%s is only implemented with domain decomposition",

1464

epbc_names[inputrec->ePBC]);

1465

}

1466

}

1467

1468

if (PAR(cr)((cr)->nnodes > 1))

←

Taking false branch

→

1469

{

1470

/* After possible communicator splitting in make_dd_communicators.

1471

* we can set up the intra/inter node communication.

1472

1473

gmx_setup_nodecomm(fplog, cr);

1474

}

1475

1476

/* Initialize per-physical-node MPI process/thread ID and counters. */

1477

gmx_init_intranode_counters(cr);

1478

1479

#ifdef GMX_MPI

1480

md_print_info(cr, fplog, "Using %d MPI %s\n",

1481

cr->nnodes,

1482

#ifdef GMX_THREAD_MPI

1483

cr->nnodes == 1 ? "thread" : "threads"

←

'?' condition is false

→

1484

#else

1485

cr->nnodes == 1 ? "process" : "processes"

1486

#endif

1487

);

1488

fflush(stderrstderr);

1489

#endif

1490

1491

/* Check and update hw_opt for the cut-off scheme */

1492

check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);

1493

1494

gmx_omp_nthreads_init(fplog, cr,

1495

hwinfo->nthreads_hw_avail,

1496

hw_opt->nthreads_omp,

1497

hw_opt->nthreads_omp_pme,

1498

(cr->duty & DUTY_PP(1<<0)) == 0,

1499

inputrec->cutoff_scheme == ecutsVERLET);

1500

1501

if (PAR(cr)((cr)->nnodes > 1))

←

Taking false branch

→

1502

{

1503

/* The master rank decided on the use of GPUs,

1504

* broadcast this information to all ranks.

1505

1506

gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr);

1507

}

1508

1509

if (bUseGPU)

←

Taking false branch

→

1510

{

1511

if (cr->npmenodes == -1)

1512

{

1513

/* Don't automatically use PME-only nodes with GPUs */

1514

cr->npmenodes = 0;

1515

}

1516

1517

/* Select GPU id's to use */

1518

gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU,

1519

&hw_opt->gpu_opt);

1520

}

1521

else

1522

{

1523

/* Ignore (potentially) manually selected GPUs */

1524

hw_opt->gpu_opt.ncuda_dev_use = 0;

1525

}

1526

1527

/* check consistency across ranks of things like SIMD

1528

* support and number of GPUs selected */

1529

gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU);

1530

1531

if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes >
1)))

1532

{

1533

/* When we share GPUs over ranks, we need to know this for the DLB */

1534

dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt);

1535

}

1536

1537

/* getting number of PP/PME threads

1538

PME: env variable should be read only on one node to make sure it is

1539

identical everywhere;

1540

1541

/* TODO nthreads_pp is only used for pinning threads.

1542

* This is a temporary solution until we have a hw topology library.

1543

1544

nthreads_pp = gmx_omp_nthreads_get(emntNonbonded);

1545

nthreads_pme = gmx_omp_nthreads_get(emntPME);

1546

1547

wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme);

1548

1549

if (PAR(cr)((cr)->nnodes > 1))

←

Taking false branch

→

1550

{

1551

/* Master synchronizes its value of reset_counters with all nodes

1552

* including PME only nodes */

1553

reset_counters = wcycle_get_reset_counters(wcycle);

1554

gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);

1555

wcycle_set_reset_counters(wcycle, reset_counters);

1556

}

1557

1558

snew(nrnb, 1)(nrnb) = save_calloc("nrnb", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1558, (1), sizeof(*(nrnb)));

1559

if (cr->duty & DUTY_PP(1<<0))

←

Taking false branch

→

1560

{

1561

bcast_state(cr, state);

1562

1563

/* Initiate forcerecord */

1564

fr = mk_forcerec();

1565

fr->hwinfo = hwinfo;

1566

fr->gpu_opt = &hw_opt->gpu_opt;

1567

init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box,

1568

opt2fn("-table", nfile, fnm),

1569

opt2fn("-tabletf", nfile, fnm),

1570

opt2fn("-tablep", nfile, fnm),

1571

opt2fn("-tableb", nfile, fnm),

1572

nbpu_opt,

1573

FALSE0,

1574

pforce);

1575

1576

/* version for PCA_NOT_READ_NODE (see md.c) */

1577

/*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,

1578

"nofile","nofile","nofile","nofile",FALSE,pforce);

1579

1580

fr->bSepDVDL = ((Flags & MD_SEPPOT(1<<7)) == MD_SEPPOT(1<<7));

1581

1582

/* Initialize QM-MM */

1583

if (fr->bQMMM)

1584

{

1585

init_QMMMrec(cr, mtop, inputrec, fr);

1586

}

1587

1588

/* Initialize the mdatoms structure.

1589

* mdatoms is not filled with atom data,

1590

* as this can not be done now with domain decomposition.

1591

1592

mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO);

1593

1594

/* Initialize the virtual site communication */

1595

vsite = init_vsite(mtop, cr, FALSE0);

1596

1597

calc_shifts(box, fr->shift_vec);

1598

1599

/* With periodic molecules the charge groups should be whole at start up

1600

* and the virtual sites should not be far from their proper positions.

1601

1602

if (!inputrec->bContinuation && MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)) &&

1603

!(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))

1604

{

1605

/* Make molecules whole at start of run */

1606

if (fr->ePBC != epbcNONE)

1607

{

1608

do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x);

1609

}

1610

if (vsite)

1611

{

1612

/* Correct initial vsite positions are required

1613

* for the initial distribution in the domain decomposition

1614

* and for the initial shell prediction.

1615

1616

construct_vsites_mtop(vsite, mtop, state->x);

1617

}

1618

}

1619

1620

if (EEL_PME(fr->eeltype)((fr->eeltype) == eelPME || (fr->eeltype) == eelPMESWITCH
|| (fr->eeltype) == eelPMEUSER || (fr->eeltype) == eelPMEUSERSWITCH
|| (fr->eeltype) == eelP3M_AD) || EVDW_PME(fr->vdwtype)((fr->vdwtype) == evdwPME))

1621

{

1622

ewaldcoeff_q = fr->ewaldcoeff_q;

1623

ewaldcoeff_lj = fr->ewaldcoeff_lj;

1624

pmedata = &fr->pmedata;

1625

}

1626

else

1627

{

1628

pmedata = NULL((void*)0);

1629

}

1630

}

1631

else

1632

{

1633

/* This is a PME only node */

1634

1635

/* We don't need the state */

1636

done_state(state);

1637

1638

ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);

1639

ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);

1640

snew(pmedata, 1)(pmedata) = save_calloc("pmedata", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1640, (1), sizeof(*(pmedata)));

1641

}

1642

1643

if (hw_opt->thread_affinity != threadaffOFF)

←

Taking false branch

→

1644

{

1645

/* Before setting affinity, check whether the affinity has changed

1646

* - which indicates that probably the OpenMP library has changed it

1647

* since we first checked).

1648

1649

gmx_check_thread_affinity_set(fplog, cr,

1650

hw_opt, hwinfo->nthreads_hw_avail, TRUE1);

1651

1652

/* Set the CPU affinity */

1653

gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo);

1654

}

1655

1656

/* Initiate PME if necessary,

1657

* either on all nodes or on dedicated PME nodes only. */

1658

if (EEL_PME(inputrec->coulombtype)((inputrec->coulombtype) == eelPME || (inputrec->coulombtype
) == eelPMESWITCH || (inputrec->coulombtype) == eelPMEUSER
|| (inputrec->coulombtype) == eelPMEUSERSWITCH || (inputrec
->coulombtype) == eelP3M_AD) || EVDW_PME(inputrec->vdwtype)((inputrec->vdwtype) == evdwPME))

1659

{

1660

if (mdatoms)

←

Taking false branch

→

1661

{

1662

nChargePerturbed = mdatoms->nChargePerturbed;

1663

if (EVDW_PME(inputrec->vdwtype)((inputrec->vdwtype) == evdwPME))

1664

{

1665

nTypePerturbed = mdatoms->nTypePerturbed;

1666

}

1667

}

1668

if (cr->npmenodes > 0)

←

Taking false branch

→

1669

{

1670

/* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/

1671

gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);

1672

gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);

1673

}

1674

1675

if (cr->duty & DUTY_PME(1<<1))

←

Taking true branch

→

1676

{

1677

status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec,

1678

mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed,

←

'?' condition is true

→

1679

(Flags & MD_REPRODUCIBLE(1<<13)), nthreads_pme);

1680

if (status != 0)

←

Assuming 'status' is equal to 0

→

←

Taking false branch

→

1681

{

1682

gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1682, "Error %d initializing PME", status);

1683

}

1684

}

1685

}

1686

1687

1688

if (integrator[inputrec->eI].func == do_md)

←

Taking false branch

→

1689

{

1690

/* Turn on signal handling on all nodes */

1691

1692

* (A user signal from the PME nodes (if any)

1693

* is communicated to the PP nodes.

1694

1695

signal_handler_install();

1696

}

1697

1698

if (cr->duty & DUTY_PP(1<<0))

←

Taking true branch

→

1699

{

1700

/* Assumes uniform use of the number of OpenMP threads */

1701

walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));

1702

1703

if (inputrec->ePull != epullNO)

←

Taking false branch

→

1704

{

1705

/* Initialize pull code */

1706

init_pull(fplog, inputrec, nfile, fnm, mtop, cr, oenv, inputrec->fepvals->init_lambda,

1707

EI_DYNAMICS(inputrec->eI)(((inputrec->eI) == eiMD || ((inputrec->eI) == eiVV || (
inputrec->eI) == eiVVAK)) || ((inputrec->eI) == eiSD1 ||
(inputrec->eI) == eiSD2) || (inputrec->eI) == eiBD) && MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)), Flags);

1708

}

1709

1710

if (inputrec->bRot)

←

Taking false branch

→

1711

{

1712

/* Initialize enforced rotation code */

1713

init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv,

1714

bVerbose, Flags);

1715

}

1716

1717

if (inputrec->eSwapCoords != eswapNO)

←

Taking false branch

→

1718

{

1719

/* Initialize ion swapping code */

1720

init_swapcoords(fplog, bVerbose, inputrec, opt2fn_master("-swap", nfile, fnm, cr),

1721

mtop, state->x, state->box, &state->swapstate, cr, oenv, Flags);

1722

}

1723

1724

constr = init_constraints(fplog, mtop, inputrec, ed, state, cr);

1725

1726

if (DOMAINDECOMP(cr)(((cr)->dd != ((void*)0)) && ((cr)->nnodes >
1)))

←

Taking true branch

→

1727

{

1728

dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,

1729

Flags & MD_DDBONDCHECK(1<<10), fr->cginfo_mb);

←

Access to field 'cginfo_mb' results in a dereference of a null pointer (loaded from variable 'fr')

1730

1731

set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox);

1732

1733

setup_dd_grid(fplog, cr->dd);

1734

}

1735

1736

/* Now do whatever the user wants us to do (how flexible...) */

1737

integrator[inputrec->eI].func(fplog, cr, nfile, fnm,

1738

oenv, bVerbose, bCompact,

1739

nstglobalcomm,

1740

vsite, constr,

1741

nstepout, inputrec, mtop,

1742

fcd, state,

1743

mdatoms, nrnb, wcycle, ed, fr,

1744

repl_ex_nst, repl_ex_nex, repl_ex_seed,

1745

membed,

1746

cpt_period, max_hours,

1747

deviceOptions,

1748

imdport,

1749

Flags,

1750

walltime_accounting);

1751

1752

if (inputrec->ePull != epullNO)

1753

{

1754

finish_pull(inputrec->pull);

1755

}

1756

1757

if (inputrec->bRot)

1758

{

1759

finish_rot(inputrec->rot);

1760

}

1761

1762

}

1763

else

1764

{

1765

/* do PME only */

1766

walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));

1767

gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec);

1768

}

1769

1770

wallcycle_stop(wcycle, ewcRUN);

1771

1772

/* Finish up, write some stuff

1773

* if rerunMD, don't write last frame again

1774

1775

finish_run(fplog, cr,

1776

inputrec, nrnb, wcycle, walltime_accounting,

1777

fr != NULL((void*)0) && fr->nbv != NULL((void*)0) && fr->nbv->bUseGPU ?

1778

nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL((void*)0),

1779

1780

1781

1782

/* Free GPU memory and context */

1783

free_gpu_resources(fr, cr);

1784

1785

if (opt2bSet("-membed", nfile, fnm))

1786

{

1787

sfree(membed)save_free("membed", "/home/alexxy/Develop/gromacs/src/programs/mdrun/runner.c"
, 1787, (membed));

1788

}

1789

1790

gmx_hardware_info_free(hwinfo);

1791

1792

/* Does what it says */

1793

print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());

1794

walltime_accounting_destroy(walltime_accounting);

1795

1796

/* Close logfile already here if we were appending to it */

1797

if (MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)) && (Flags & MD_APPENDFILES(1<<15)))

1798

{

1799

gmx_log_close(fplog);

1800

}

1801

1802

rc = (int)gmx_get_stop_condition();

1803

1804

#ifdef GMX_THREAD_MPI

1805

/* we need to join all threads. The sub-threads join when they

1806

exit this function, but the master thread needs to be told to

1807

wait for that. */

1808

if (PAR(cr)((cr)->nnodes > 1) && MASTER(cr)(((cr)->nodeid == 0) || !((cr)->nnodes > 1)))

1809

{

1810

tMPI_Finalize();

1811

}

1812

#endif

1813

1814

return rc;

1815

}