Tidy: modernize-use-nullptr
[alexxy/gromacs.git] / src / gromacs / ewald / pme-pp.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief This file contains function definitions necessary for
40  * managing the offload of long-ranged PME work to separate MPI rank,
41  * for computing energies and forces (Coulomb and LJ).
42  *
43  * \author Berk Hess <hess@kth.se>
44  * \ingroup module_ewald
45  */
46
47 #include "gmxpre.h"
48
49 #include "config.h"
50
51 #include <math.h>
52 #include <stdio.h>
53 #include <string.h>
54
55 #include "gromacs/domdec/domdec.h"
56 #include "gromacs/domdec/domdec_struct.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/gmxlib/network.h"
59 #include "gromacs/math/vec.h"
60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
61 #include "gromacs/mdlib/sighandler.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/utility/fatalerror.h"
65 #include "gromacs/utility/gmxmpi.h"
66 #include "gromacs/utility/smalloc.h"
67
68 #include "pme-internal.h"
69
70 /*! \brief MPI Tags used to separate communication of different types of quantities */
71 enum {
72     eCommType_ChargeA, eCommType_ChargeB, eCommType_SQRTC6A, eCommType_SQRTC6B,
73     eCommType_SigmaA, eCommType_SigmaB, eCommType_NR, eCommType_COORD,
74     eCommType_CNB
75 };
76
77 //@{
78 /*! \brief Flags used to coordinate PP-PME communication and computation phases
79  *
80  * Some parts of the code(gmx_pme_send_q, gmx_pme_recv_q_x) assume
81  * that the six first flags are exactly in this order.
82  */
83
84 #define PP_PME_CHARGE         (1<<0)
85 #define PP_PME_CHARGEB        (1<<1)
86 #define PP_PME_SQRTC6         (1<<2)
87 #define PP_PME_SQRTC6B        (1<<3)
88 #define PP_PME_SIGMA          (1<<4)
89 #define PP_PME_SIGMAB         (1<<5)
90 #define PP_PME_COORD          (1<<6)
91 #define PP_PME_ENER_VIR       (1<<9)
92 #define PP_PME_FINISH         (1<<10)
93 #define PP_PME_SWITCHGRID     (1<<11)
94 #define PP_PME_RESETCOUNTERS  (1<<12)
95
96 #define PME_PP_SIGSTOP        (1<<0)
97 #define PME_PP_SIGSTOPNSS     (1<<1)
98 //@}
99
100 /*! \brief Master PP-PME communication data structure */
101 struct gmx_pme_pp {
102 #if GMX_MPI
103     MPI_Comm     mpi_comm_mysim; /**< MPI communicator for this simulation */
104 #endif
105     int          nnode;          /**< The number of PP node to communicate with  */
106     int         *node;           /**< The PP node ranks                          */
107     int          node_peer;      /**< The peer PP node rank                      */
108     int         *nat;            /**< The number of atom for each PP node        */
109     //@{
110     /**< Vectors of A- and B-state parameters used to transfer vectors to PME ranks  */
111     real        *chargeA;
112     real        *chargeB;
113     real        *sqrt_c6A;
114     real        *sqrt_c6B;
115     real        *sigmaA;
116     real        *sigmaB;
117     //@}
118     rvec        *x;             /**< Vector of atom coordinates to transfer to PME ranks */
119     rvec        *f;             /**< Vector of atom forces received from PME ranks */
120     int          nalloc;        /**< Allocation size of transfer vectors (>= \p nat) */
121 #if GMX_MPI
122     //@{
123     /**< Vectors of MPI objects used in non-blocking communication between multiple PP ranks per PME rank */
124     MPI_Request *req;
125     MPI_Status  *stat;
126     //@}
127 #endif
128 };
129
130 /*! \brief Helper struct for PP-PME communication of parameters */
131 struct gmx_pme_comm_n_box_t {
132     int             natoms;     /**< Number of atoms */
133     matrix          box;        /**< Box */
134     int             maxshift_x; /**< Maximum shift in x direction */
135     int             maxshift_y; /**< Maximum shift in y direction */
136     real            lambda_q;   /**< Free-energy lambda for electrostatics */
137     real            lambda_lj;  /**< Free-energy lambda for Lennard-Jones */
138     unsigned int    flags;      /**< Control flags */
139     gmx_int64_t     step;       /**< MD integration step number */
140     //@{
141     /*! \brief Used in PME grid tuning */
142     ivec            grid_size;
143     real            ewaldcoeff_q;
144     real            ewaldcoeff_lj;
145     //@}
146 };
147
148 /*! \brief Helper struct for PP-PME communication of virial and energy */
149 typedef struct {
150     //@{
151     /*! \brief Virial, energy, and derivative of potential w.r.t. lambda for charge and Lennard-Jones */
152     matrix          vir_q;
153     matrix          vir_lj;
154     real            energy_q;
155     real            energy_lj;
156     real            dvdlambda_q;
157     real            dvdlambda_lj;
158     //@}
159     float           cycles;     /**< Counter of CPU cycles used */
160     gmx_stop_cond_t stop_cond;  /**< Flag used in responding to an external signal to terminate */
161 } gmx_pme_comm_vir_ene_t;
162
163 gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr)
164 {
165     struct gmx_pme_pp *pme_pp;
166
167     snew(pme_pp, 1);
168
169 #if GMX_MPI
170     int rank;
171
172     pme_pp->mpi_comm_mysim = cr->mpi_comm_mysim;
173     MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
174     get_pme_ddnodes(cr, rank, &pme_pp->nnode, &pme_pp->node, &pme_pp->node_peer);
175     snew(pme_pp->nat, pme_pp->nnode);
176     snew(pme_pp->req, eCommType_NR*pme_pp->nnode);
177     snew(pme_pp->stat, eCommType_NR*pme_pp->nnode);
178     pme_pp->nalloc       = 0;
179 #else
180     GMX_UNUSED_VALUE(cr);
181 #endif
182
183     return pme_pp;
184 }
185
186 /*! \brief Block to wait for communication to PME ranks to complete
187  *
188  * This should be faster with a real non-blocking MPI implementation */
189 /* #define GMX_PME_DELAYED_WAIT */
190
191 static void gmx_pme_send_coeffs_coords_wait(gmx_domdec_t gmx_unused *dd)
192 {
193 #if GMX_MPI
194     if (dd->nreq_pme)
195     {
196         MPI_Waitall(dd->nreq_pme, dd->req_pme, MPI_STATUSES_IGNORE);
197         dd->nreq_pme = 0;
198     }
199 #endif
200 }
201
202 /*! \brief Send data to PME ranks */
203 static void gmx_pme_send_coeffs_coords(t_commrec *cr, unsigned int flags,
204                                        real gmx_unused *chargeA, real gmx_unused *chargeB,
205                                        real gmx_unused *c6A, real gmx_unused *c6B,
206                                        real gmx_unused *sigmaA, real gmx_unused *sigmaB,
207                                        matrix box, rvec gmx_unused *x,
208                                        real lambda_q, real lambda_lj,
209                                        int maxshift_x, int maxshift_y,
210                                        gmx_int64_t step)
211 {
212     gmx_domdec_t         *dd;
213     gmx_pme_comm_n_box_t *cnb;
214     int                   n;
215
216     dd = cr->dd;
217     n  = dd->nat_home;
218
219     if (debug)
220     {
221         fprintf(debug, "PP rank %d sending to PME rank %d: %d%s%s%s%s\n",
222                 cr->sim_nodeid, dd->pme_nodeid, n,
223                 (flags & PP_PME_CHARGE) ? " charges" : "",
224                 (flags & PP_PME_SQRTC6) ? " sqrtC6" : "",
225                 (flags & PP_PME_SIGMA)  ? " sigma" : "",
226                 (flags & PP_PME_COORD)  ? " coordinates" : "");
227     }
228
229 #ifdef GMX_PME_DELAYED_WAIT
230     /* When can not use cnb until pending communication has finished */
231     gmx_pme_send_coeffs_coords_wait(dd);
232 #endif
233
234     if (dd->pme_receive_vir_ener)
235     {
236         /* Peer PP node: communicate all data */
237         if (dd->cnb == nullptr)
238         {
239             snew(dd->cnb, 1);
240         }
241         cnb = dd->cnb;
242
243         cnb->flags      = flags;
244         cnb->natoms     = n;
245         cnb->maxshift_x = maxshift_x;
246         cnb->maxshift_y = maxshift_y;
247         cnb->lambda_q   = lambda_q;
248         cnb->lambda_lj  = lambda_lj;
249         cnb->step       = step;
250         if (flags & PP_PME_COORD)
251         {
252             copy_mat(box, cnb->box);
253         }
254 #if GMX_MPI
255         MPI_Isend(cnb, sizeof(*cnb), MPI_BYTE,
256                   dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim,
257                   &dd->req_pme[dd->nreq_pme++]);
258 #endif
259     }
260     else if (flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
261     {
262 #if GMX_MPI
263         /* Communicate only the number of atoms */
264         MPI_Isend(&n, sizeof(n), MPI_BYTE,
265                   dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim,
266                   &dd->req_pme[dd->nreq_pme++]);
267 #endif
268     }
269
270 #if GMX_MPI
271     if (n > 0)
272     {
273         if (flags & PP_PME_CHARGE)
274         {
275             MPI_Isend(chargeA, n*sizeof(real), MPI_BYTE,
276                       dd->pme_nodeid, eCommType_ChargeA, cr->mpi_comm_mysim,
277                       &dd->req_pme[dd->nreq_pme++]);
278         }
279         if (flags & PP_PME_CHARGEB)
280         {
281             MPI_Isend(chargeB, n*sizeof(real), MPI_BYTE,
282                       dd->pme_nodeid, eCommType_ChargeB, cr->mpi_comm_mysim,
283                       &dd->req_pme[dd->nreq_pme++]);
284         }
285         if (flags & PP_PME_SQRTC6)
286         {
287             MPI_Isend(c6A, n*sizeof(real), MPI_BYTE,
288                       dd->pme_nodeid, eCommType_SQRTC6A, cr->mpi_comm_mysim,
289                       &dd->req_pme[dd->nreq_pme++]);
290         }
291         if (flags & PP_PME_SQRTC6B)
292         {
293             MPI_Isend(c6B, n*sizeof(real), MPI_BYTE,
294                       dd->pme_nodeid, eCommType_SQRTC6B, cr->mpi_comm_mysim,
295                       &dd->req_pme[dd->nreq_pme++]);
296         }
297         if (flags & PP_PME_SIGMA)
298         {
299             MPI_Isend(sigmaA, n*sizeof(real), MPI_BYTE,
300                       dd->pme_nodeid, eCommType_SigmaA, cr->mpi_comm_mysim,
301                       &dd->req_pme[dd->nreq_pme++]);
302         }
303         if (flags & PP_PME_SIGMAB)
304         {
305             MPI_Isend(sigmaB, n*sizeof(real), MPI_BYTE,
306                       dd->pme_nodeid, eCommType_SigmaB, cr->mpi_comm_mysim,
307                       &dd->req_pme[dd->nreq_pme++]);
308         }
309         if (flags & PP_PME_COORD)
310         {
311             MPI_Isend(x[0], n*sizeof(rvec), MPI_BYTE,
312                       dd->pme_nodeid, eCommType_COORD, cr->mpi_comm_mysim,
313                       &dd->req_pme[dd->nreq_pme++]);
314         }
315     }
316
317 #ifndef GMX_PME_DELAYED_WAIT
318     /* Wait for the data to arrive */
319     /* We can skip this wait as we are sure x and q will not be modified
320      * before the next call to gmx_pme_send_x_q or gmx_pme_receive_f.
321      */
322     gmx_pme_send_coeffs_coords_wait(dd);
323 #endif
324 #endif
325 }
326
327 void gmx_pme_send_parameters(t_commrec *cr,
328                              const interaction_const_t *ic,
329                              gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
330                              real *chargeA, real *chargeB,
331                              real *sqrt_c6A, real *sqrt_c6B,
332                              real *sigmaA, real *sigmaB,
333                              int maxshift_x, int maxshift_y)
334 {
335     unsigned int flags = 0;
336
337     if (EEL_PME(ic->eeltype))
338     {
339         flags |= PP_PME_CHARGE;
340     }
341     if (EVDW_PME(ic->vdwtype))
342     {
343         flags |= (PP_PME_SQRTC6 | PP_PME_SIGMA);
344     }
345     if (bFreeEnergy_q || bFreeEnergy_lj)
346     {
347         /* Assumes that the B state flags are in the bits just above
348          * the ones for the A state. */
349         flags |= (flags << 1);
350     }
351
352     gmx_pme_send_coeffs_coords(cr, flags,
353                                chargeA, chargeB,
354                                sqrt_c6A, sqrt_c6B, sigmaA, sigmaB,
355                                nullptr, nullptr, 0, 0, maxshift_x, maxshift_y, -1);
356 }
357
358 void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
359                               real lambda_q, real lambda_lj,
360                               gmx_bool bEnerVir,
361                               gmx_int64_t step)
362 {
363     unsigned int flags = PP_PME_COORD;
364     if (bEnerVir)
365     {
366         flags |= PP_PME_ENER_VIR;
367     }
368     gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
369                                box, x, lambda_q, lambda_lj, 0, 0, step);
370 }
371
372 void gmx_pme_send_finish(t_commrec *cr)
373 {
374     unsigned int flags = PP_PME_FINISH;
375
376     gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0, 0, -1);
377 }
378
379 void gmx_pme_send_switchgrid(t_commrec gmx_unused *cr,
380                              ivec gmx_unused       grid_size,
381                              real gmx_unused       ewaldcoeff_q,
382                              real gmx_unused       ewaldcoeff_lj)
383 {
384 #if GMX_MPI
385     gmx_pme_comm_n_box_t cnb;
386
387     /* Only let one PP node signal each PME node */
388     if (cr->dd->pme_receive_vir_ener)
389     {
390         cnb.flags = PP_PME_SWITCHGRID;
391         copy_ivec(grid_size, cnb.grid_size);
392         cnb.ewaldcoeff_q  = ewaldcoeff_q;
393         cnb.ewaldcoeff_lj = ewaldcoeff_lj;
394
395         /* We send this, uncommon, message blocking to simplify the code */
396         MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
397                  cr->dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim);
398     }
399 #endif
400 }
401
402 void gmx_pme_send_resetcounters(t_commrec gmx_unused *cr, gmx_int64_t gmx_unused step)
403 {
404 #if GMX_MPI
405     gmx_pme_comm_n_box_t cnb;
406
407     /* Only let one PP node signal each PME node */
408     if (cr->dd->pme_receive_vir_ener)
409     {
410         cnb.flags = PP_PME_RESETCOUNTERS;
411         cnb.step  = step;
412
413         /* We send this, uncommon, message blocking to simplify the code */
414         MPI_Send(&cnb, sizeof(cnb), MPI_BYTE,
415                  cr->dd->pme_nodeid, eCommType_CNB, cr->mpi_comm_mysim);
416     }
417 #endif
418 }
419
420 int gmx_pme_recv_coeffs_coords(struct gmx_pme_pp *pme_pp,
421                                int               *natoms,
422                                real             **chargeA,
423                                real             **chargeB,
424                                real             **sqrt_c6A,
425                                real             **sqrt_c6B,
426                                real             **sigmaA,
427                                real             **sigmaB,
428                                matrix             box,
429                                rvec             **x,
430                                rvec             **f,
431                                int               *maxshift_x,
432                                int               *maxshift_y,
433                                real              *lambda_q,
434                                real              *lambda_lj,
435                                gmx_bool          *bEnerVir,
436                                gmx_int64_t       *step,
437                                ivec               grid_size,
438                                real              *ewaldcoeff_q,
439                                real              *ewaldcoeff_lj)
440 {
441     int status = -1;
442     int nat    = 0;
443
444 #if GMX_MPI
445     unsigned int flags    = 0;
446     int          messages = 0;
447
448     do
449     {
450         gmx_pme_comm_n_box_t cnb;
451         cnb.flags = 0;
452
453         /* Receive the send count, box and time step from the peer PP node */
454         MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE,
455                  pme_pp->node_peer, eCommType_CNB,
456                  pme_pp->mpi_comm_mysim, MPI_STATUS_IGNORE);
457
458         /* We accumulate all received flags */
459         flags |= cnb.flags;
460
461         *step  = cnb.step;
462
463         if (debug)
464         {
465             fprintf(debug, "PME only rank receiving:%s%s%s%s%s\n",
466                     (cnb.flags & PP_PME_CHARGE)        ? " charges" : "",
467                     (cnb.flags & PP_PME_COORD )        ? " coordinates" : "",
468                     (cnb.flags & PP_PME_FINISH)        ? " finish" : "",
469                     (cnb.flags & PP_PME_SWITCHGRID)    ? " switch grid" : "",
470                     (cnb.flags & PP_PME_RESETCOUNTERS) ? " reset counters" : "");
471         }
472
473         if (cnb.flags & PP_PME_FINISH)
474         {
475             status = pmerecvqxFINISH;
476         }
477
478         if (cnb.flags & PP_PME_SWITCHGRID)
479         {
480             /* Special case, receive the new parameters and return */
481             copy_ivec(cnb.grid_size, grid_size);
482             *ewaldcoeff_q  = cnb.ewaldcoeff_q;
483             *ewaldcoeff_lj = cnb.ewaldcoeff_lj;
484
485             status         = pmerecvqxSWITCHGRID;
486         }
487
488         if (cnb.flags & PP_PME_RESETCOUNTERS)
489         {
490             /* Special case, receive the step (set above) and return */
491             status = pmerecvqxRESETCOUNTERS;
492         }
493
494         if (cnb.flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
495         {
496             /* Receive the send counts from the other PP nodes */
497             for (int sender = 0; sender < pme_pp->nnode; sender++)
498             {
499                 if (pme_pp->node[sender] == pme_pp->node_peer)
500                 {
501                     pme_pp->nat[sender] = cnb.natoms;
502                 }
503                 else
504                 {
505                     MPI_Irecv(&(pme_pp->nat[sender]), sizeof(pme_pp->nat[0]),
506                               MPI_BYTE,
507                               pme_pp->node[sender], eCommType_CNB,
508                               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
509                 }
510             }
511             MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
512             messages = 0;
513
514             nat = 0;
515             for (int sender = 0; sender < pme_pp->nnode; sender++)
516             {
517                 nat += pme_pp->nat[sender];
518             }
519
520             if (nat > pme_pp->nalloc)
521             {
522                 pme_pp->nalloc = over_alloc_dd(nat);
523                 if (cnb.flags & PP_PME_CHARGE)
524                 {
525                     srenew(pme_pp->chargeA, pme_pp->nalloc);
526                 }
527                 if (cnb.flags & PP_PME_CHARGEB)
528                 {
529                     srenew(pme_pp->chargeB, pme_pp->nalloc);
530                 }
531                 if (cnb.flags & PP_PME_SQRTC6)
532                 {
533                     srenew(pme_pp->sqrt_c6A, pme_pp->nalloc);
534                 }
535                 if (cnb.flags & PP_PME_SQRTC6B)
536                 {
537                     srenew(pme_pp->sqrt_c6B, pme_pp->nalloc);
538                 }
539                 if (cnb.flags & PP_PME_SIGMA)
540                 {
541                     srenew(pme_pp->sigmaA, pme_pp->nalloc);
542                 }
543                 if (cnb.flags & PP_PME_SIGMAB)
544                 {
545                     srenew(pme_pp->sigmaB, pme_pp->nalloc);
546                 }
547                 srenew(pme_pp->x, pme_pp->nalloc);
548                 srenew(pme_pp->f, pme_pp->nalloc);
549             }
550
551             /* maxshift is sent when the charges are sent */
552             *maxshift_x = cnb.maxshift_x;
553             *maxshift_y = cnb.maxshift_y;
554
555             /* Receive the charges in place */
556             for (int q = 0; q < eCommType_NR; q++)
557             {
558                 real *charge_pp;
559
560                 if (!(cnb.flags & (PP_PME_CHARGE<<q)))
561                 {
562                     continue;
563                 }
564                 switch (q)
565                 {
566                     case eCommType_ChargeA: charge_pp = pme_pp->chargeA;  break;
567                     case eCommType_ChargeB: charge_pp = pme_pp->chargeB;  break;
568                     case eCommType_SQRTC6A: charge_pp = pme_pp->sqrt_c6A; break;
569                     case eCommType_SQRTC6B: charge_pp = pme_pp->sqrt_c6B; break;
570                     case eCommType_SigmaA:  charge_pp = pme_pp->sigmaA;   break;
571                     case eCommType_SigmaB:  charge_pp = pme_pp->sigmaB;   break;
572                     default: gmx_incons("Wrong eCommType");
573                 }
574                 nat = 0;
575                 for (int sender = 0; sender < pme_pp->nnode; sender++)
576                 {
577                     if (pme_pp->nat[sender] > 0)
578                     {
579                         MPI_Irecv(charge_pp+nat,
580                                   pme_pp->nat[sender]*sizeof(real),
581                                   MPI_BYTE,
582                                   pme_pp->node[sender], q,
583                                   pme_pp->mpi_comm_mysim,
584                                   &pme_pp->req[messages++]);
585                         nat += pme_pp->nat[sender];
586                         if (debug)
587                         {
588                             fprintf(debug, "Received from PP rank %d: %d %s\n",
589                                     pme_pp->node[sender], pme_pp->nat[sender],
590                                     (q == eCommType_ChargeA ||
591                                      q == eCommType_ChargeB) ? "charges" : "params");
592                         }
593                     }
594                 }
595             }
596         }
597
598         if (cnb.flags & PP_PME_COORD)
599         {
600             /* The box, FE flag and lambda are sent along with the coordinates
601              *  */
602             copy_mat(cnb.box, box);
603             *lambda_q       = cnb.lambda_q;
604             *lambda_lj      = cnb.lambda_lj;
605             *bEnerVir       = (cnb.flags & PP_PME_ENER_VIR);
606             *step           = cnb.step;
607
608             /* Receive the coordinates in place */
609             nat = 0;
610             for (int sender = 0; sender < pme_pp->nnode; sender++)
611             {
612                 if (pme_pp->nat[sender] > 0)
613                 {
614                     MPI_Irecv(pme_pp->x[nat], pme_pp->nat[sender]*sizeof(rvec),
615                               MPI_BYTE,
616                               pme_pp->node[sender], eCommType_COORD,
617                               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
618                     nat += pme_pp->nat[sender];
619                     if (debug)
620                     {
621                         fprintf(debug, "Received from PP rank %d: %d "
622                                 "coordinates\n",
623                                 pme_pp->node[sender], pme_pp->nat[sender]);
624                     }
625                 }
626             }
627
628             status = pmerecvqxX;
629         }
630
631         /* Wait for the coordinates and/or charges to arrive */
632         MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
633         messages = 0;
634     }
635     while (status == -1);
636 #else
637     GMX_UNUSED_VALUE(box);
638     GMX_UNUSED_VALUE(maxshift_x);
639     GMX_UNUSED_VALUE(maxshift_y);
640     GMX_UNUSED_VALUE(lambda_q);
641     GMX_UNUSED_VALUE(lambda_lj);
642     GMX_UNUSED_VALUE(bEnerVir);
643     GMX_UNUSED_VALUE(step);
644     GMX_UNUSED_VALUE(grid_size);
645     GMX_UNUSED_VALUE(ewaldcoeff_q);
646     GMX_UNUSED_VALUE(ewaldcoeff_lj);
647
648     status = pmerecvqxX;
649 #endif
650
651     if (status == pmerecvqxX)
652     {
653         *natoms   = nat;
654         *chargeA  = pme_pp->chargeA;
655         *chargeB  = pme_pp->chargeB;
656         *sqrt_c6A = pme_pp->sqrt_c6A;
657         *sqrt_c6B = pme_pp->sqrt_c6B;
658         *sigmaA   = pme_pp->sigmaA;
659         *sigmaB   = pme_pp->sigmaB;
660         *x        = pme_pp->x;
661         *f        = pme_pp->f;
662     }
663
664     return status;
665 }
666 /*! \brief Receive virial and energy from PME rank */
667 static void receive_virial_energy(t_commrec *cr,
668                                   matrix vir_q, real *energy_q,
669                                   matrix vir_lj, real *energy_lj,
670                                   real *dvdlambda_q, real *dvdlambda_lj,
671                                   float *pme_cycles)
672 {
673     gmx_pme_comm_vir_ene_t cve;
674
675     if (cr->dd->pme_receive_vir_ener)
676     {
677         if (debug)
678         {
679             fprintf(debug,
680                     "PP rank %d receiving from PME rank %d: virial and energy\n",
681                     cr->sim_nodeid, cr->dd->pme_nodeid);
682         }
683 #if GMX_MPI
684         MPI_Recv(&cve, sizeof(cve), MPI_BYTE, cr->dd->pme_nodeid, 1, cr->mpi_comm_mysim,
685                  MPI_STATUS_IGNORE);
686 #else
687         memset(&cve, 0, sizeof(cve));
688 #endif
689
690         m_add(vir_q, cve.vir_q, vir_q);
691         m_add(vir_lj, cve.vir_lj, vir_lj);
692         *energy_q      = cve.energy_q;
693         *energy_lj     = cve.energy_lj;
694         *dvdlambda_q  += cve.dvdlambda_q;
695         *dvdlambda_lj += cve.dvdlambda_lj;
696         *pme_cycles    = cve.cycles;
697
698         if (cve.stop_cond != gmx_stop_cond_none)
699         {
700             gmx_set_stop_condition(cve.stop_cond);
701         }
702     }
703     else
704     {
705         *energy_q   = 0;
706         *energy_lj  = 0;
707         *pme_cycles = 0;
708     }
709 }
710
711 void gmx_pme_receive_f(t_commrec *cr,
712                        rvec f[], matrix vir_q, real *energy_q,
713                        matrix vir_lj, real *energy_lj,
714                        real *dvdlambda_q, real *dvdlambda_lj,
715                        float *pme_cycles)
716 {
717 #ifdef GMX_PME_DELAYED_WAIT
718     /* Wait for the x request to finish */
719     gmx_pme_send_coeffs_coords_wait(cr->dd);
720 #endif
721
722     int natoms = cr->dd->nat_home;
723
724     if (natoms > cr->dd->pme_recv_f_alloc)
725     {
726         cr->dd->pme_recv_f_alloc = over_alloc_dd(natoms);
727         srenew(cr->dd->pme_recv_f_buf, cr->dd->pme_recv_f_alloc);
728     }
729
730 #if GMX_MPI
731     MPI_Recv(cr->dd->pme_recv_f_buf[0],
732              natoms*sizeof(rvec), MPI_BYTE,
733              cr->dd->pme_nodeid, 0, cr->mpi_comm_mysim,
734              MPI_STATUS_IGNORE);
735 #endif
736
737     int nt = gmx_omp_nthreads_get_simple_rvec_task(emntDefault, natoms);
738
739     /* Note that we would like to avoid this conditional by putting it
740      * into the omp pragma instead, but then we still take the full
741      * omp parallel for overhead (at least with gcc5).
742      */
743     if (nt == 1)
744     {
745         for (int i = 0; i < natoms; i++)
746         {
747             rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
748         }
749     }
750     else
751     {
752 #pragma omp parallel for num_threads(nt) schedule(static)
753         for (int i = 0; i < natoms; i++)
754         {
755             rvec_inc(f[i], cr->dd->pme_recv_f_buf[i]);
756         }
757     }
758
759     receive_virial_energy(cr, vir_q, energy_q, vir_lj, energy_lj, dvdlambda_q, dvdlambda_lj, pme_cycles);
760 }
761
762 void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
763                                  rvec gmx_unused *f,
764                                  matrix vir_q, real energy_q,
765                                  matrix vir_lj, real energy_lj,
766                                  real dvdlambda_q, real dvdlambda_lj,
767                                  float cycles)
768 {
769 #if GMX_MPI
770     gmx_pme_comm_vir_ene_t cve;
771     int                    messages, ind_start, ind_end;
772     cve.cycles = cycles;
773
774     /* Now the evaluated forces have to be transferred to the PP nodes */
775     messages = 0;
776     ind_end  = 0;
777     for (int receiver = 0; receiver < pme_pp->nnode; receiver++)
778     {
779         ind_start = ind_end;
780         ind_end   = ind_start + pme_pp->nat[receiver];
781         if (MPI_Isend(f[ind_start], (ind_end-ind_start)*sizeof(rvec), MPI_BYTE,
782                       pme_pp->node[receiver], 0,
783                       pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]) != 0)
784         {
785             gmx_comm("MPI_Isend failed in do_pmeonly");
786         }
787     }
788
789     /* send virial and energy to our last PP node */
790     copy_mat(vir_q, cve.vir_q);
791     copy_mat(vir_lj, cve.vir_lj);
792     cve.energy_q     = energy_q;
793     cve.energy_lj    = energy_lj;
794     cve.dvdlambda_q  = dvdlambda_q;
795     cve.dvdlambda_lj = dvdlambda_lj;
796     /* check for the signals to send back to a PP node */
797     cve.stop_cond = gmx_get_stop_condition();
798
799     cve.cycles = cycles;
800
801     if (debug)
802     {
803         fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n",
804                 pme_pp->node_peer);
805     }
806     MPI_Isend(&cve, sizeof(cve), MPI_BYTE,
807               pme_pp->node_peer, 1,
808               pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
809
810     /* Wait for the forces to arrive */
811     MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
812 #else
813     gmx_call("MPI not enabled");
814     GMX_UNUSED_VALUE(pme_pp);
815     GMX_UNUSED_VALUE(f);
816     GMX_UNUSED_VALUE(vir_q);
817     GMX_UNUSED_VALUE(energy_q);
818     GMX_UNUSED_VALUE(vir_lj);
819     GMX_UNUSED_VALUE(energy_lj);
820     GMX_UNUSED_VALUE(dvdlambda_q);
821     GMX_UNUSED_VALUE(dvdlambda_lj);
822     GMX_UNUSED_VALUE(cycles);
823 #endif
824 }