2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file defines functions for managing threading of listed
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_listed-forces
46 #include "manage-threading.h"
52 #include "gromacs/listed-forces/bonded.h"
53 #include "gromacs/utility/fatalerror.h"
54 #include "gromacs/utility/smalloc.h"
56 //! Divides listed interactions over threads
57 static void divide_bondeds_over_threads(t_idef *idef, int nthreads)
64 idef->nthreads = nthreads;
66 if (F_NRE*(nthreads+1) > idef->il_thread_division_nalloc)
68 idef->il_thread_division_nalloc = F_NRE*(nthreads+1);
69 snew(idef->il_thread_division, idef->il_thread_division_nalloc);
72 for (ftype = 0; ftype < F_NRE; ftype++)
74 if (ftype_is_bonded_potential(ftype))
76 nat1 = interaction_function[ftype].nratoms + 1;
78 for (t = 0; t <= nthreads; t++)
80 /* Divide the interactions equally over the threads.
81 * When the different types of bonded interactions
82 * are distributed roughly equally over the threads,
83 * this should lead to well localized output into
84 * the force buffer on each thread.
85 * If this is not the case, a more advanced scheme
86 * (not implemented yet) will do better.
88 il_nr_thread = (((idef->il[ftype].nr/nat1)*t)/nthreads)*nat1;
90 /* Ensure that distance restraint pairs with the same label
91 * end up on the same thread.
92 * This is slighlty tricky code, since the next for iteration
93 * may have an initial il_nr_thread lower than the final value
94 * in the previous iteration, but this will anyhow be increased
95 * to the approriate value again by this while loop.
97 while (ftype == F_DISRES &&
99 il_nr_thread < idef->il[ftype].nr &&
100 idef->iparams[idef->il[ftype].iatoms[il_nr_thread]].disres.label ==
101 idef->iparams[idef->il[ftype].iatoms[il_nr_thread-nat1]].disres.label)
103 il_nr_thread += nat1;
106 idef->il_thread_division[ftype*(nthreads+1)+t] = il_nr_thread;
112 //! Construct a reduction mask for which interaction was computed on which thread
114 calc_bonded_reduction_mask(const t_idef *idef,
119 int ftype, nb, nat1, nb0, nb1, i, a;
123 for (ftype = 0; ftype < F_NRE; ftype++)
125 if (ftype_is_bonded_potential(ftype))
127 nb = idef->il[ftype].nr;
130 nat1 = interaction_function[ftype].nratoms + 1;
132 /* Divide this interaction equally over the threads.
133 * This is not stored: should match division in calc_bonds.
135 nb0 = idef->il_thread_division[ftype*(nt+1)+t];
136 nb1 = idef->il_thread_division[ftype*(nt+1)+t+1];
138 for (i = nb0; i < nb1; i += nat1)
140 for (a = 1; a < nat1; a++)
142 mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
153 /*! \brief We divide the force array in a maximum of 32 blocks.
154 * Minimum force block reduction size is thus 2^6=64.
156 const int maxBlockBits = 32;
158 void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
163 assert(fr->nthreads >= 1);
165 /* Divide the bonded interaction over the threads */
166 divide_bondeds_over_threads(idef, fr->nthreads);
168 if (fr->nthreads == 1)
176 while (fr->natoms_force > (int)(maxBlockBits*(1U<<fr->red_ashift)))
182 fprintf(debug, "bonded force buffer block atom shift %d bits\n",
186 /* Determine to which blocks each thread's bonded force calculation
187 * contributes. Store this is a mask for each thread.
189 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
190 for (t = 1; t < fr->nthreads; t++)
192 fr->f_t[t].red_mask =
193 calc_bonded_reduction_mask(idef, fr->red_ashift, t, fr->nthreads);
196 /* Determine the maximum number of blocks we need to reduce over */
199 for (t = 0; t < fr->nthreads; t++)
202 for (b = 0; b < maxBlockBits; b++)
204 if (fr->f_t[t].red_mask & (1U<<b))
206 fr->red_nblock = std::max(fr->red_nblock, b+1);
212 fprintf(debug, "thread %d flags %x count %d\n",
213 t, fr->f_t[t].red_mask, c);
219 fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
220 fr->red_nblock, 1<<fr->red_ashift);
221 fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
222 ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
223 ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));