/home/alexxy/Develop/gromacs/src/gromacs/mdlib/perf

Bug Summary

File:	gromacs/mdlib/perf_est.c
Location:	line 417, column 48
Description:	Branch condition evaluates to a garbage value

Annotated Source Code

* This file is part of the GROMACS molecular simulation package.

* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,

* and including many others, as listed in the AUTHORS file in the

* top-level source directory and at http://www.gromacs.org.

* GROMACS is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public License

* as published by the Free Software Foundation; either version 2.1

* of the License, or (at your option) any later version.

* GROMACS is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

* You should have received a copy of the GNU Lesser General Public

* License along with GROMACS; if not, see

* http://www.gnu.org/licenses, or write to the Free Software Foundation,

* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

* If you want to redistribute modifications to GROMACS, please

* consider that scientific software is very special. Version

* control is crucial - bugs must be traceable. We will be happy to

* consider code for inclusion in the official distribution, but

* derived work must not be called official GROMACS. Details are found

* in the README & COPYING files - if they are missing, get the

* official version at http://www.gromacs.org.

* To help us fund GROMACS development, we humbly ask that you cite

* the research papers on the package. Check out http://www.gromacs.org.

#ifdef HAVE_CONFIG_H1

#include <config.h>

#endif

#include <math.h>

#include "perf_est.h"

#include "physics.h"

#include "gromacs/math/vec.h"

#include "mtop_util.h"

#include "types/commrec.h"

#include "nbnxn_search.h"

#include "nbnxn_consts.h"

/* Computational cost of bonded, non-bonded and PME calculations.

* This will be machine dependent.

* The numbers here are accurate for Intel Core2 and AMD Athlon 64

* in single precision. In double precision PME mesh is slightly cheaper,

* although not so much that the numbers need to be adjusted.

/* Cost of a pair interaction in the "group" cut-off scheme */

#define C_GR_FQ1.5 1.5

#define C_GR_QLJ_CUT1.5 1.5

#define C_GR_QLJ_TAB2.0 2.0

#define C_GR_LJ_CUT1.0 1.0

#define C_GR_LJ_TAB1.75 1.75

/* Cost of 1 water with one Q/LJ atom */

#define C_GR_QLJW_CUT2.0 2.0

#define C_GR_QLJW_TAB2.25 2.25

/* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */

#define C_GR_QW1.75 1.75

/* Cost of a pair interaction in the "Verlet" cut-off scheme, QEXP is Ewald */

#define C_VT_LJ0.30 0.30

#define C_VT_QRF_LJ0.40 0.40

#define C_VT_QRF0.30 0.30

#define C_VT_QEXP_LJ0.55 0.55

#define C_VT_QEXP0.50 0.50

/* Extra cost for expensive LJ interaction, e.g. pot-switch or LJ-PME */

#define C_VT_LJEXP_ADD0.20 0.20

/* Cost of PME, with all components running with SSE instructions */

/* Cost of particle reordering and redistribution */

#define C_PME_REDIST12.0 12.0

/* Cost of q spreading and force interpolation per charge (mainly memory) */

#define C_PME_SPREAD0.30 0.30

/* Cost of fft's, will be multiplied with N log(N) */

#define C_PME_FFT0.20 0.20

/* Cost of pme_solve, will be multiplied with N */

#define C_PME_SOLVE0.50 0.50

/* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */

#define C_BOND5.0 5.0

int n_bonded_dx(gmx_mtop_t *mtop, gmx_bool bExcl)

{

int mb, nmol, ftype, ndxb, ndx_excl;

int ndx;

gmx_moltype_t *molt;

/* Count the number of pbc_rvec_sub calls required for bonded interactions.

100

* This number is also roughly proportional to the computational cost.

101

102

ndx = 0;

103

ndx_excl = 0;

104

for (mb = 0; mb < mtop->nmolblock; mb++)

105

{

106

molt = &mtop->moltype[mtop->molblock[mb].type];

107

nmol = mtop->molblock[mb].nmol;

108

for (ftype = 0; ftype < F_NRE; ftype++)

109

{

110

if (interaction_function[ftype].flags & IF_BOND1)

111

{

112

switch (ftype)

113

{

114

case F_POSRES:

115

case F_FBPOSRES: ndxb = 1; break;

116

case F_CONNBONDS: ndxb = 0; break;

117

default: ndxb = NRAL(ftype)(interaction_function[(ftype)].nratoms) - 1; break;

118

}

119

ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype)(interaction_function[(ftype)].nratoms));

120

}

121

}

122

if (bExcl)

123

{

124

ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;

125

}

126

else

127

{

128

ndx_excl = 0;

129

}

130

}

131

132

if (debug)

133

{

134

fprintf(debug, "ndx bonded %d exclusions %d\n", ndx, ndx_excl);

135

}

136

137

ndx += ndx_excl;

138

139

return ndx;

140

}

141

142

static void pp_group_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,

143

int *nq_tot, int *nlj_tot,

144

double *cost_pp,

145

gmx_bool *bChargePerturbed, gmx_bool *bTypePerturbed)

146

{

147

t_atom *atom;

148

int mb, nmol, atnr, cg, a, a0, ncqlj, ncq, nclj;

149

gmx_bool bBHAM, bLJcut, bWater, bQ, bLJ;

150

int nw, nqlj, nq, nlj;

151

float fq, fqlj, flj, fljtab, fqljw, fqw;

152

t_iparams *iparams;

153

gmx_moltype_t *molt;

154

155

bBHAM = (mtop->ffparams.functype[0] == F_BHAM);

156

157

bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);

158

159

/* Computational cost of bonded, non-bonded and PME calculations.

160

* This will be machine dependent.

161

* The numbers here are accurate for Intel Core2 and AMD Athlon 64

162

* in single precision. In double precision PME mesh is slightly cheaper,

163

* although not so much that the numbers need to be adjusted.

164

165

fq = C_GR_FQ1.5;

166

fqlj = (bLJcut ? C_GR_QLJ_CUT1.5 : C_GR_QLJ_TAB2.0);

167

flj = (bLJcut ? C_GR_LJ_CUT1.0 : C_GR_LJ_TAB1.75);

168

/* Cost of 1 water with one Q/LJ atom */

169

fqljw = (bLJcut ? C_GR_QLJW_CUT2.0 : C_GR_QLJW_TAB2.25);

170

/* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */

171

fqw = C_GR_QW1.75;

172

173

iparams = mtop->ffparams.iparams;

174

atnr = mtop->ffparams.atnr;

175

nw = 0;

176

nqlj = 0;

177

nq = 0;

178

nlj = 0;

179

*bChargePerturbed = FALSE0;

180

for (mb = 0; mb < mtop->nmolblock; mb++)

181

{

182

molt = &mtop->moltype[mtop->molblock[mb].type];

183

atom = molt->atoms.atom;

184

nmol = mtop->molblock[mb].nmol;

185

a = 0;

186

for (cg = 0; cg < molt->cgs.nr; cg++)

187

{

188

bWater = !bBHAM;

189

ncqlj = 0;

190

ncq = 0;

191

nclj = 0;

192

a0 = a;

193

while (a < molt->cgs.index[cg+1])

194

{

195

bQ = (atom[a].q != 0 || atom[a].qB != 0);

196

bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 ||

197

iparams[(atnr+1)*atom[a].type].lj.c12 != 0);

198

if (atom[a].q != atom[a].qB)

199

{

200

*bChargePerturbed = TRUE1;

201

}

202

if (atom[a].type != atom[a].typeB)

203

{

204

*bTypePerturbed = TRUE1;

205

}

206

/* This if this atom fits into water optimization */

207

if (!((a == a0 && bQ && bLJ) ||

208

(a == a0+1 && bQ && !bLJ) ||

209

(a == a0+2 && bQ && !bLJ && atom[a].q == atom[a-1].q) ||

210

(a == a0+3 && !bQ && bLJ)))

211

{

212

bWater = FALSE0;

213

}

214

if (bQ && bLJ)

215

{

216

ncqlj++;

217

}

218

else

219

{

220

if (bQ)

221

{

222

ncq++;

223

}

224

if (bLJ)

225

{

226

nclj++;

227

}

228

}

229

a++;

230

}

231

if (bWater)

232

{

233

nw += nmol;

234

}

235

else

236

{

237

nqlj += nmol*ncqlj;

238

nq += nmol*ncq;

239

nlj += nmol*nclj;

240

}

241

}

242

}

243

244

*nq_tot = nq + nqlj + nw*3;

245

*nlj_tot = nlj + nqlj + nw;

246

247

if (debug)

248

{

249

fprintf(debug, "nw %d nqlj %d nq %d nlj %d\n", nw, nqlj, nq, nlj);

250

}

251

252

/* For the PP non-bonded cost it is (unrealistically) assumed

253

* that all atoms are distributed homogeneously in space.

254

* Factor 3 is used because a water molecule has 3 atoms

255

* (and TIP4P effectively has 3 interactions with (water) atoms)).

256

257

*cost_pp = 0.5*(fqljw*nw*nqlj +

258

fqw *nw*(3*nw + nq) +

259

fqlj *nqlj*nqlj +

260

fq *nq*(3*nw + nqlj + nq) +

261

flj *nlj*(nw + nqlj + nlj))

262

*4/3*M_PI3.14159265358979323846*ir->rlist*ir->rlist*ir->rlist/det(box);

263

}

264

265

static void pp_verlet_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,

266

int *nq_tot, int *nlj_tot,

267

double *cost_pp,

268

gmx_bool *bChargePerturbed, gmx_bool *bTypePerturbed)

269

{

270

t_atom *atom;

271

int mb, nmol, atnr, cg, a, a0, nqlj, nq, nlj;

272

gmx_bool bQRF;

273

t_iparams *iparams;

274

gmx_moltype_t *molt;

275

real r_eff;

276

double c_qlj, c_q, c_lj;

277

double nat;

278

/* Conversion factor for reference vs SIMD kernel performance.

279

* The factor is about right for SSE2/4, but should be 2 higher for AVX256.

280

281

#ifdef GMX_DOUBLE

282

const real nbnxn_refkernel_fac = 4.0;

283

#else

284

const real nbnxn_refkernel_fac = 8.0;

285

#endif

286

287

bQRF = (EEL_RF(ir->coulombtype)((ir->coulombtype) == eelRF || (ir->coulombtype) == eelGRF
|| (ir->coulombtype) == eelRF_NEC || (ir->coulombtype)
== eelRF_ZERO ) || ir->coulombtype == eelCUT);

288

289

iparams = mtop->ffparams.iparams;

290

atnr = mtop->ffparams.atnr;

291

nqlj = 0;

292

nq = 0;

293

*bChargePerturbed = FALSE0;

294

for (mb = 0; mb < mtop->nmolblock; mb++)

←

Loop condition is false. Execution continues on line 325

→

295

{

296

molt = &mtop->moltype[mtop->molblock[mb].type];

297

atom = molt->atoms.atom;

298

nmol = mtop->molblock[mb].nmol;

299

a = 0;

300

for (a = 0; a < molt->atoms.nr; a++)

301

{

302

if (atom[a].q != 0 || atom[a].qB != 0)

303

{

304

if (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 ||

305

iparams[(atnr+1)*atom[a].type].lj.c12 != 0)

306

{

307

nqlj += nmol;

308

}

309

else

310

{

311

nq += nmol;

312

}

313

}

314

if (atom[a].q != atom[a].qB)

315

{

316

*bChargePerturbed = TRUE1;

317

}

318

if (atom[a].type != atom[a].typeB)

319

{

320

*bTypePerturbed = TRUE1;

321

}

322

}

323

}

324

325

nlj = mtop->natoms - nqlj - nq;

326

327

*nq_tot = nqlj + nq;

328

*nlj_tot = nqlj + nlj;

329

330

/* Effective cut-off for cluster pair list of 4x4 atoms */

331

r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE4, mtop->natoms/det(box));

Calling 'det'

Returning from 'det'

332

333

if (debug)

←

Assuming 'debug' is null

→

←

Taking false branch

→

334

{

335

fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",

336

nqlj, nq, nlj, ir->rlist, r_eff);

337

}

338

339

/* Determine the cost per pair interaction */

340

c_qlj = (bQRF ? C_VT_QRF_LJ0.40 : C_VT_QEXP_LJ0.55);

←

'?' condition is false

→

341

c_q = (bQRF ? C_VT_QRF0.30 : C_VT_QEXP0.50);

←

'?' condition is false

→

342

c_lj = C_VT_LJ0.30;

343

if (ir->vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME))

344

{

345

c_qlj += C_VT_LJEXP_ADD0.20;

346

c_lj += C_VT_LJEXP_ADD0.20;

347

}

348

if (EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME) && ir->ljpme_combination_rule == eljpmeLB)

←

Taking false branch

→

349

{

350

/* We don't have LJ-PME LB comb. rule kernels, we use slow kernels */

351

c_qlj *= nbnxn_refkernel_fac;

352

c_q *= nbnxn_refkernel_fac;

353

c_lj *= nbnxn_refkernel_fac;

354

}

355

356

/* For the PP non-bonded cost it is (unrealistically) assumed

357

* that all atoms are distributed homogeneously in space.

358

359

/* Convert mtop->natoms to double to avoid int overflow */

360

nat = mtop->natoms;

361

*cost_pp = 0.5*nat*(nqlj*c_qlj + nq*c_q + nlj*c_lj)

362

*4/3*M_PI3.14159265358979323846*r_eff*r_eff*r_eff/det(box);

Calling 'det'

Returning from 'det'

363

}

364

365

float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box)

366

{

367

t_atom *atom;

368

int mb, nmol, atnr, cg, a, a0, nq_tot, nlj_tot, f;

369

gmx_bool bBHAM, bLJcut, bChargePerturbed, bTypePerturbed;

'bTypePerturbed' declared without an initial value

→

370

gmx_bool bWater, bQ, bLJ;

371

double cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;

372

float ratio;

373

t_iparams *iparams;

374

gmx_moltype_t *molt;

375

376

/* Computational cost of bonded, non-bonded and PME calculations.

377

* This will be machine dependent.

378

* The numbers here are accurate for Intel Core2 and AMD Athlon 64

379

* in single precision. In double precision PME mesh is slightly cheaper,

380

* although not so much that the numbers need to be adjusted.

381

382

383

iparams = mtop->ffparams.iparams;

384

atnr = mtop->ffparams.atnr;

385

386

cost_bond = C_BOND5.0*n_bonded_dx(mtop, TRUE1);

387

388

if (ir->cutoff_scheme == ecutsGROUP)

←

Taking false branch

→

389

{

390

pp_group_load(mtop, ir, box,

391

&nq_tot, &nlj_tot, &cost_pp,

392

&bChargePerturbed, &bTypePerturbed);

393

}

394

else

395

{

396

pp_verlet_load(mtop, ir, box,

←

Calling 'pp_verlet_load'

→

←

Returning from 'pp_verlet_load'

→

397

&nq_tot, &nlj_tot, &cost_pp,

398

&bChargePerturbed, &bTypePerturbed);

399

}

400

401

cost_redist = 0;

402

cost_spread = 0;

403

cost_fft = 0;

404

cost_solve = 0;

405

406

if (EEL_PME(ir->coulombtype)((ir->coulombtype) == eelPME || (ir->coulombtype) == eelPMESWITCH
|| (ir->coulombtype) == eelPMEUSER || (ir->coulombtype
) == eelPMEUSERSWITCH || (ir->coulombtype) == eelP3M_AD))

←

Taking false branch

→

407

{

408

f = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1);

409

cost_redist += C_PME_REDIST12.0*nq_tot;

410

cost_spread += f*C_PME_SPREAD0.30*nq_tot*pow(ir->pme_order, 3);

411

cost_fft += f*C_PME_FFT0.20*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);

412

cost_solve += f*C_PME_SOLVE0.50*ir->nkx*ir->nky*ir->nkz;

413

}

414

415

if (EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME))

←

Taking true branch

→

416

{

417

f = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1);

←

Branch condition evaluates to a garbage value

418

if (ir->ljpme_combination_rule == eljpmeLB)

419

{

420

/* LB combination rule: we have 7 mesh terms */

421

f *= 7;

422

}

423

cost_redist += C_PME_REDIST12.0*nlj_tot;

424

cost_spread += f*C_PME_SPREAD0.30*nlj_tot*pow(ir->pme_order, 3);

425

cost_fft += f*C_PME_FFT0.20*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);

426

cost_solve += f*C_PME_SOLVE0.50*ir->nkx*ir->nky*ir->nkz;

427

}

428

429

cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;

430

431

ratio = cost_pme/(cost_bond + cost_pp + cost_pme);

432

433

if (debug)

434

{

435

fprintf(debug,

436

"cost_bond %f\n"

437

"cost_pp %f\n"

438

"cost_redist %f\n"

439

"cost_spread %f\n"

440

"cost_fft %f\n"

441

"cost_solve %f\n",

442

cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);

443

444

fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);

445

}

446

447

return ratio;

448

}