2 #include "gromacs/utility/real.h"
5 #include "../gmx_blas.h"
6 #include "../gmx_lapack.h"
7 #include "lapack_limits.h"
10 F77_FUNC(sgesdd,SGESDD)(const char *jobz,
25 int a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
31 int idum[1], ierr, itau;
32 int minmn, wrkbl, itaup, itauq, mnthr;
35 int wntqn, wntqo, wntqs;
38 int minwrk, ldwrku, maxwrk, ldwkvt;
39 float smlnum,minval, safemin;
48 a_offset = 1 + a_dim1;
52 u_offset = 1 + u_dim1;
55 vt_offset = 1 + vt_dim1;
61 minmn = (*m < *n) ? *m : *n;
62 mnthr = (int) (minmn * 11. / 6.);
63 wntqa = (*jobz=='a' || *jobz=='A');
64 wntqs = (*jobz=='s' || *jobz=='S');
65 wntqas = wntqa || wntqs;
66 wntqn = (*jobz=='o' || *jobz=='O');
67 wntqo = (*jobz=='n' || *jobz=='N');
71 lquery = *lwork == -1;
73 if (*info == 0 && *m > 0 && *n > 0) {
79 bdspac = *n * 3 * *n + (*n << 2);
85 i__1 = wrkbl, i__2 = bdspac + *n;
86 maxwrk = (i__1 > i__2) ? i__1 : i__2;
91 i__1 = wrkbl, i__2 = *n + (*m << 5);
92 wrkbl = (i__1 > i__2) ? i__1 : i__2;
93 i__1 = wrkbl, i__2 = bdspac + *n * 3;
94 wrkbl = (i__1 > i__2) ? i__1 : i__2;
95 maxwrk = wrkbl + *n * *n;
96 minwrk = bdspac + *n * *n + *n * 3;
100 wrkbl = *n * 3 + (*m + *n*32);
102 i__1 = wrkbl, i__2 = bdspac + *n * 3;
103 maxwrk = (i__1 > i__2) ? i__1 : i__2;
104 minwrk = *n * 3 + ((*m > bdspac) ? *m : bdspac);
106 i__1 = maxwrk, i__2 = bdspac + *n * 3;
107 maxwrk = (i__1 > i__2) ? i__1 : i__2;
108 minwrk = *n * 3 + ((*m > bdspac) ? *m : bdspac);
116 bdspac = *m * 3 * *m + (*m*4);
122 i__1 = wrkbl, i__2 = bdspac + *m;
123 maxwrk = (i__1 > i__2) ? i__1 : i__2;
124 minwrk = bdspac + *m;
128 i__1 = wrkbl, i__2 = *m + (*n*32);
129 wrkbl = (i__1 > i__2) ? i__1 : i__2;
131 i__1 = wrkbl, i__2 = bdspac + *m * 3;
132 wrkbl = (i__1 > i__2) ? i__1 : i__2;
133 maxwrk = wrkbl + *m * *m;
134 minwrk = bdspac + *m * *m + *m * 3;
137 wrkbl = *m * 3 + (*m + *n*32);
139 i__1 = wrkbl, i__2 = bdspac + *m * 3;
140 maxwrk = (i__1 > i__2) ? i__1 : i__2;
141 minwrk = *m * 3 + ((*m > bdspac) ? *m : bdspac);
143 i__1 = wrkbl, i__2 = bdspac + *m * 3;
144 maxwrk = (i__1 > i__2) ? i__1 : i__2;
145 minwrk = *m * 3 + ((*m > bdspac) ? *m : bdspac);
149 work[1] = (float) maxwrk;
157 if (*m == 0 || *n == 0) {
164 minval = GMX_FLOAT_MIN;
165 safemin = minval / eps;
166 smlnum = sqrt(safemin) / eps;
169 bignum = 1. / smlnum;
172 anrm = F77_FUNC(slange,SLANGE)("M", m, n, &a[a_offset], lda, dum);
174 if (anrm > 0. && anrm < smlnum) {
176 F77_FUNC(slascl,SLASCL)("G",&c__0,&c__0,&anrm,&smlnum,m,n,&a[a_offset],lda,&ierr);
177 } else if (anrm > bignum) {
179 F77_FUNC(slascl,SLASCL)("G",&c__0,&c__0,&anrm,&bignum,m,n,&a[a_offset],lda,&ierr);
190 i__1 = *lwork - nwork + 1;
191 F77_FUNC(sgeqrf,SGEQRF)(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
196 F77_FUNC(slaset,SLASET)("L", &i__1, &i__2, &zero, &zero, &a[a_dim1 + 2],
203 i__1 = *lwork - nwork + 1;
204 F77_FUNC(sgebrd,SGEBRD)(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
205 itauq], &work[itaup], &work[nwork], &i__1, &ierr);
208 F77_FUNC(sbdsdc,SBDSDC)("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
209 dum, idum, &work[nwork], &iwork[1], info);
215 itau = iu + ldwrku * *n;
218 i__1 = *lwork - nwork + 1;
219 F77_FUNC(sgeqrf,SGEQRF)(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
221 F77_FUNC(slacpy,SLACPY)("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
223 i__1 = *lwork - nwork + 1;
224 F77_FUNC(sorgqr,SORGQR)(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
229 F77_FUNC(slaset,SLASET)("L", &i__1, &i__2, &zero, &zero, &a[a_dim1 + 2],
236 i__1 = *lwork - nwork + 1;
237 F77_FUNC(sgebrd,SGEBRD)(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
238 itauq], &work[itaup], &work[nwork], &i__1, &ierr);
240 F77_FUNC(sbdsdc,SBDSDC)("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
241 vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
244 i__1 = *lwork - nwork + 1;
245 F77_FUNC(sormbr,SORMBR)("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
246 itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
248 i__1 = *lwork - nwork + 1;
249 F77_FUNC(sormbr,SORMBR)("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
250 itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
253 F77_FUNC(sgemm,SGEMM)("N", "N", m, n, n, &one, &u[u_offset], ldu, &work[iu]
254 , &ldwrku, &zero, &a[a_offset], lda);
256 F77_FUNC(slacpy,SLACPY)("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
266 i__1 = *lwork - nwork + 1;
267 F77_FUNC(sgebrd,SGEBRD)(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
268 work[itaup], &work[nwork], &i__1, &ierr);
271 F77_FUNC(sbdsdc,SBDSDC)("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
272 dum, idum, &work[nwork], &iwork[1], info);
275 F77_FUNC(slaset,SLASET)("F", m, m, &zero, &zero, &u[u_offset], ldu);
276 F77_FUNC(sbdsdc,SBDSDC)("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
277 vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
282 F77_FUNC(slaset,SLASET)("F", &i__1, &i__2, &zero, &one, &u[*n + 1 + (*n +
285 i__1 = *lwork - nwork + 1;
286 F77_FUNC(sormbr,SORMBR)("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
287 itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
288 i__1 = *lwork - nwork + 1;
289 F77_FUNC(sormbr,SORMBR)("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
290 itaup], &vt[vt_offset],ldvt,&work[nwork],&i__1,&ierr);
304 i__1 = *lwork - nwork + 1;
305 F77_FUNC(sgelqf,SGELQF)(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
310 F77_FUNC(slaset,SLASET)("U", &i__1, &i__2, &zero, &zero, &a[(a_dim1*2) +
317 i__1 = *lwork - nwork + 1;
318 F77_FUNC(sgebrd,SGEBRD)(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
319 itauq], &work[itaup], &work[nwork], &i__1, &ierr);
322 F77_FUNC(sbdsdc,SBDSDC)("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
323 dum, idum, &work[nwork], &iwork[1], info);
330 itau = ivt + ldwkvt * *m;
333 i__1 = *lwork - nwork + 1;
334 F77_FUNC(sgelqf,SGELQF)(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
336 F77_FUNC(slacpy,SLACPY)("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
338 i__1 = *lwork - nwork + 1;
339 F77_FUNC(sorglq,SORGLQ)(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
340 nwork], &i__1, &ierr);
344 F77_FUNC(slaset,SLASET)("U", &i__1, &i__2, &zero, &zero, &a[(a_dim1*2) +
351 i__1 = *lwork - nwork + 1;
352 F77_FUNC(sgebrd,SGEBRD)(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
353 itauq], &work[itaup], &work[nwork], &i__1, &ierr);
355 F77_FUNC(sbdsdc,SBDSDC)("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
356 work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
359 i__1 = *lwork - nwork + 1;
360 F77_FUNC(sormbr,SORMBR)("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
361 itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
362 i__1 = *lwork - nwork + 1;
363 F77_FUNC(sormbr,SORMBR)("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
364 itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, &
367 F77_FUNC(sgemm,SGEMM)("N", "N", m, n, m, &one, &work[ivt], &ldwkvt, &vt[
368 vt_offset], ldvt, &zero, &a[a_offset], lda);
370 F77_FUNC(slacpy,SLACPY)("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
381 i__1 = *lwork - nwork + 1;
382 F77_FUNC(sgebrd,SGEBRD)(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
383 work[itaup], &work[nwork], &i__1, &ierr);
386 F77_FUNC(sbdsdc,SBDSDC)("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
387 dum, idum, &work[nwork], &iwork[1], info);
389 F77_FUNC(slaset,SLASET)("F", n, n, &zero, &zero, &vt[vt_offset], ldvt);
390 F77_FUNC(sbdsdc,SBDSDC)("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
391 vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
396 F77_FUNC(slaset,SLASET)("F", &i__1, &i__2, &zero, &one, &vt[*m + 1 + (*m +
397 1) * vt_dim1], ldvt);
399 i__1 = *lwork - nwork + 1;
400 F77_FUNC(sormbr,SORMBR)("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
401 itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
402 i__1 = *lwork - nwork + 1;
403 F77_FUNC(sormbr,SORMBR)("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
404 itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
414 F77_FUNC(slascl,SLASCL)("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
418 F77_FUNC(slascl,SLASCL)("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
423 work[1] = (float) maxwrk;