Fixing copyright issues and code contributors
[alexxy/gromacs.git] / src / gmxlib / gmx_lapack / slarrvx.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
5  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6  * others, as listed in the AUTHORS file in the top-level source
7  * directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 #include <math.h>
36
37 #include <types/simple.h>
38
39 #include "gmx_blas.h"
40 #include "gmx_lapack.h"
41 #include "lapack_limits.h"
42
43
44 void
45 F77_FUNC(slarrvx,SLARRVX)(int *n, 
46         float *d__, 
47         float *l, 
48         int *isplit,
49         int *m, 
50         float *w,
51         int *iblock, 
52         int *indexw, 
53         float *gersch, 
54         float *tol, 
55         float *z__, 
56         int *ldz, 
57         int *isuppz, 
58         float *work, 
59         int *iwork, 
60         int *info)
61 {
62     int z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6;
63     float d__1, d__2;
64     float c_b5 = 0.;
65     int c__1 = 1;
66     int c__2 = 2;
67
68     int i__, j, k, p, q;
69     int im, in;
70     float gap, eps, tmp;
71     int zto;
72     float ztz;
73     int iend, jblk;
74     int wend, iter, temp[1], ktot;
75     int itmp1, itmp2;
76     int indld;
77     float sigma;
78     int ndone, iinfo, iindr;
79     float resid;
80     int nomgs;
81     int nclus;
82     int zfrom, iindc1, iindc2;
83     float lambda;
84     int ibegin;
85     int indgap, indlld;
86     float mingma;
87     int oldien, oldncl, wbegin;
88     float relgap;
89     int oldcls;
90     int ndepth, inderr, iindwk;
91     int newcls, oldfst;
92     float minrgp=0.0;
93     int indwrk, oldlst;
94     float reltol;
95     int newfrs, newftt, parity;
96     float mgstol, nrminv, rqcorr;
97     int newlst, newsiz;
98
99
100     --d__;
101     --l;
102     --isplit;
103     --w;
104     --iblock;
105     --indexw;
106     --gersch;
107     z_dim1 = *ldz;
108     z_offset = 1 + z_dim1;
109     z__ -= z_offset;
110     --isuppz;
111     --work;
112     --iwork;
113
114     inderr = *n;
115     indld = *n << 1;
116     indlld = *n * 3;
117     indgap = *n << 2;
118     indwrk = *n * 5 + 1;
119
120     iindr = *n;
121     iindc1 = *n << 1;
122     iindc2 = *n * 3;
123     iindwk = (*n << 2) + 1;
124
125     eps = GMX_FLOAT_EPS;
126
127     i__1 = *n << 1;
128     for (i__ = 1; i__ <= i__1; ++i__) {
129         iwork[i__] = 0;
130     }
131     F77_FUNC(slaset,SLASET)("Full", n, m, &c_b5, &c_b5, &z__[z_offset], ldz);
132     mgstol = eps * 100.;
133
134     ibegin = 1;
135     wbegin = 1;
136     i__1 = iblock[*m];
137     for (jblk = 1; jblk <= i__1; ++jblk) {
138         iend = isplit[jblk];
139
140         wend = wbegin - 1;
141 L171:
142         if (wend < *m) {
143             if (iblock[wend + 1] == jblk) {
144                 ++wend;
145                 goto L171;
146             }
147         }
148         if (wend < wbegin) {
149             ibegin = iend + 1;
150             continue;
151         }
152
153         if (ibegin == iend) {
154             z__[ibegin + wbegin * z_dim1] = 1.;
155             isuppz[(wbegin << 1) - 1] = ibegin;
156             isuppz[wbegin * 2] = ibegin;
157             ibegin = iend + 1;
158             wbegin = wend + 1;
159             continue;
160         }
161         oldien = ibegin - 1;
162         in = iend - oldien;
163         d__1 = .001, d__2 = 1. / (float) in;
164         reltol = (d__1<d__2) ? d__1 : d__2;
165         im = wend - wbegin + 1;
166         F77_FUNC(scopy,SCOPY)(&im, &w[wbegin], &c__1, &work[1], &c__1);
167         i__2 = im - 1;
168         for (i__ = 1; i__ <= i__2; ++i__) {
169             work[inderr + i__] = eps * fabs(work[i__]);
170             work[indgap + i__] = work[i__ + 1] - work[i__];
171         }
172         work[inderr + im] = eps * fabs(work[im]);
173         d__2 = fabs(work[im]);
174         work[indgap + im] = (d__2>eps) ? d__2 : eps;
175         ndone = 0;
176
177         ndepth = 0;
178         parity = 1;
179         nclus = 1;
180         iwork[iindc1 + 1] = 1;
181         iwork[iindc1 + 2] = im;
182
183 L40:
184         if (ndone < im) {
185             oldncl = nclus;
186             nclus = 0;
187             parity = 1 - parity;
188             if (parity == 0) {
189                 oldcls = iindc1;
190                 newcls = iindc2;
191             } else {
192                 oldcls = iindc2;
193                 newcls = iindc1;
194             }
195             i__2 = oldncl;
196             for (i__ = 1; i__ <= i__2; ++i__) {
197
198                 j = oldcls + (i__ << 1);
199                 oldfst = iwork[j - 1];
200                 oldlst = iwork[j];
201                 if (ndepth > 0) {
202                     j = wbegin + oldfst - 1;
203                     F77_FUNC(scopy,SCOPY)(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
204                             , &c__1);
205                     i__3 = in - 1;
206                     F77_FUNC(scopy,SCOPY)(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
207                             ibegin], &c__1);
208                     F77_FUNC(slaset,SLASET)("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j 
209                             * z_dim1], ldz);
210                 }
211                 k = ibegin;
212                 i__3 = in - 1;
213                 for (j = 1; j <= i__3; ++j) {
214                     tmp = d__[k] * l[k];
215                     work[indld + j] = tmp;
216                     work[indlld + j] = tmp * l[k];
217                     ++k;
218                 }
219                 if (ndepth > 0) {
220
221                     p = indexw[wbegin - 1 + oldfst];
222                     q = indexw[wbegin - 1 + oldlst];
223                     d__1 = eps * 4.;
224                     i__3 = p - oldfst;
225                     F77_FUNC(slarrbx,SLARRBX)(&in, &d__[ibegin], &l[ibegin], &work[indld + 1], &
226                             work[indlld + 1], &p, &q, &reltol, &d__1, &i__3, &
227                             work[1], &work[indgap + 1], &work[inderr + 1], &
228                             work[indwrk + in], &iwork[iindwk], &iinfo);
229                 }
230                 newfrs = oldfst;
231                 i__3 = oldlst;
232                 for (j = oldfst; j <= i__3; ++j) {
233                     if (j == oldlst || work[indgap + j] >= 
234                         reltol * fabs(work[j])) {
235                         newlst = j;
236                     } else {
237
238                         relgap = work[indgap + j] / fabs(work[j]);
239                         if (j == newfrs) {
240                             minrgp = relgap;
241                         } else {
242                             minrgp = (minrgp<relgap) ? minrgp : relgap;
243                         }
244                         continue;
245                     }
246                     newsiz = newlst - newfrs + 1;
247                     newftt = wbegin + newfrs - 1;
248                     nomgs = newsiz == 1 || newsiz > 1 || minrgp < mgstol;
249                     if (newsiz > 1 && nomgs) {
250
251                         F77_FUNC(slarrfx,SLARRFX)(&in, &d__[ibegin], &l[ibegin], &work[indld + 
252                                 1], &work[indlld + 1], &newfrs, &newlst, &
253                                 work[1], &sigma, &z__[ibegin + newftt * 
254                                 z_dim1], &z__[ibegin + (newftt + 1) * z_dim1],
255                                  &work[indwrk], info);
256                         if (*info == 0) {
257                             tmp = eps * fabs(sigma);
258                             i__4 = newlst;
259                             for (k = newfrs; k <= i__4; ++k) {
260                                 work[k] -= sigma;
261                                 d__1 = work[indgap + k];
262                                 work[indgap + k] = (d__1>tmp) ? d__1 : tmp;
263                                 work[inderr + k] += tmp;
264                             }
265                             ++nclus;
266                             k = newcls + (nclus << 1);
267                             iwork[k - 1] = newfrs;
268                             iwork[k] = newlst;
269                         } else {
270                             *info = 0;
271                             if (minrgp >= mgstol) {
272                                 nomgs = 0;
273                             } else {
274
275                                 work[indwrk] = d__[ibegin];
276                                 i__4 = in - 1;
277                                 for (k = 1; k <= i__4; ++k) {
278                                     work[indwrk + k] = d__[ibegin + k] + work[
279                                             indlld + k];
280                                 }
281                                 i__4 = newsiz;
282                                 for (k = 1; k <= i__4; ++k) {
283                                     iwork[iindwk + k - 1] = 1;
284                                 }
285                                 i__4 = newlst;
286                                 for (k = newfrs; k <= i__4; ++k) {
287                                     isuppz[2*(oldien + k) - 1] = 1;
288                                     isuppz[(oldien + k) * 2] = in;
289                                 }
290                                 temp[0] = in;
291                                 F77_FUNC(sstein,SSTEIN)(&in, &work[indwrk], &work[indld + 1], 
292                                         &newsiz, &work[newfrs], &iwork[iindwk]
293                                         , temp, &z__[ibegin + newftt * z_dim1]
294                                         , ldz, &work[indwrk + in], &iwork[
295                                         iindwk + in], &iwork[iindwk + (in*2)], &iinfo);
296                                 if (iinfo != 0) {
297                                     *info = 2;
298                                     return;
299                                 }
300                                 ndone += newsiz;
301                             }
302                         }
303                     } else {
304                         ktot = newftt;
305                         i__4 = newlst;
306                         for (k = newfrs; k <= i__4; ++k) {
307                             iter = 0;
308 L90:
309                             lambda = work[k];
310
311                             F77_FUNC(slar1vx,SLAR1VX)(&in, &c__1, &in, &lambda, &d__[ibegin], &
312                                     l[ibegin], &work[indld + 1], &work[indlld 
313                                     + 1], &w[wbegin + k - 1], &gersch[(oldien 
314                                     << 1) + 1], &z__[ibegin + ktot * z_dim1], 
315                                     &ztz, &mingma, &iwork[iindr + ktot], &
316                                     isuppz[(ktot << 1) - 1], &work[indwrk]);
317                             tmp = 1. / ztz;
318                             nrminv = sqrt(tmp);
319                             resid = fabs(mingma) * nrminv;
320                             rqcorr = mingma * tmp;
321                             if (k == in) {
322                                 gap = work[indgap + k - 1];
323                             } else if (k == 1) {
324                                 gap = work[indgap + k];
325                             } else {
326                                 d__1 = work[indgap + k - 1], d__2 = work[
327                                         indgap + k];
328                                 gap = (d__1<d__2) ? d__1 : d__2;
329                             }
330                             ++iter;
331                             if (resid > *tol * gap && fabs(rqcorr) > eps * 4. *
332                                      fabs(lambda)) {
333                                 work[k] = lambda + rqcorr;
334                                 if (iter < 8) {
335                                     goto L90;
336                                 }
337                             }
338                             iwork[ktot] = 1;
339                             if (newsiz == 1) {
340                                 ++ndone;
341                             }
342                             zfrom = isuppz[(ktot << 1) - 1];
343                             zto = isuppz[ktot * 2];
344                             i__5 = zto - zfrom + 1;
345                             F77_FUNC(sscal,SSCAL)(&i__5, &nrminv, &z__[ibegin + zfrom - 1 + 
346                                     ktot * z_dim1], &c__1);
347                             ++ktot;
348                         }
349                         if (newsiz > 1) {
350                             itmp1 = isuppz[(newftt << 1) - 1];
351                             itmp2 = isuppz[newftt * 2];
352                             ktot = oldien + newlst;
353                             i__4 = ktot;
354                             for (p = newftt + 1; p <= i__4; ++p) {
355                                 i__5 = p - 1;
356                                 for (q = newftt; q <= i__5; ++q) {
357                                     tmp = -F77_FUNC(sdot,SDOT)(&in, &z__[ibegin + p * 
358                                             z_dim1], &c__1, &z__[ibegin + q * 
359                                             z_dim1], &c__1);
360                                     F77_FUNC(saxpy,SAXPY)(&in, &tmp, &z__[ibegin + q * 
361                                             z_dim1], &c__1, &z__[ibegin + p * 
362                                             z_dim1], &c__1);
363                                 }
364                                 tmp = 1. / F77_FUNC(snrm2,SNRM2)(&in, &z__[ibegin + p * 
365                                         z_dim1], &c__1);
366                                 F77_FUNC(sscal,SSCAL)(&in, &tmp, &z__[ibegin + p * z_dim1], &
367                                         c__1);
368                                 i__5 = itmp1, i__6 = isuppz[(p << 1) - 1];
369                                 itmp1 = (i__5<i__6) ? i__5 : i__6;
370                                 i__5 = itmp2, i__6 = isuppz[p * 2];
371                                 itmp2 = (i__5>i__6) ? i__5 : i__6;
372                             }
373                             i__4 = ktot;
374                             for (p = newftt; p <= i__4; ++p) {
375                                 isuppz[(p << 1) - 1] = itmp1;
376                                 isuppz[p * 2] = itmp2;
377                             }
378                             ndone += newsiz;
379                         }
380                     }
381                     newfrs = j + 1;
382                 }
383             }
384             ++ndepth;
385             goto L40;
386         }
387         j = wbegin << 1;
388         i__2 = wend;
389         for (i__ = wbegin; i__ <= i__2; ++i__) {
390             isuppz[j - 1] += oldien;
391             isuppz[j] += oldien;
392             j += 2;
393
394         }
395         ibegin = iend + 1;
396         wbegin = wend + 1;
397     }
398
399     return;
400
401