ergo
template_lapack_latrd.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LATRD_HEADER
36 #define TEMPLATE_LAPACK_LATRD_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *
41  a, const integer *lda, Treal *e, Treal *tau, Treal *w,
42  const integer *ldw)
43 {
44 /* -- LAPACK auxiliary routine (version 3.0) --
45  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
46  Courant Institute, Argonne National Lab, and Rice University
47  October 31, 1992
48 
49 
50  Purpose
51  =======
52 
53  DLATRD reduces NB rows and columns of a real symmetric matrix A to
54  symmetric tridiagonal form by an orthogonal similarity
55  transformation Q' * A * Q, and returns the matrices V and W which are
56  needed to apply the transformation to the unreduced part of A.
57 
58  If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
59  matrix, of which the upper triangle is supplied;
60  if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
61  matrix, of which the lower triangle is supplied.
62 
63  This is an auxiliary routine called by DSYTRD.
64 
65  Arguments
66  =========
67 
68  UPLO (input) CHARACTER
69  Specifies whether the upper or lower triangular part of the
70  symmetric matrix A is stored:
71  = 'U': Upper triangular
72  = 'L': Lower triangular
73 
74  N (input) INTEGER
75  The order of the matrix A.
76 
77  NB (input) INTEGER
78  The number of rows and columns to be reduced.
79 
80  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
81  On entry, the symmetric matrix A. If UPLO = 'U', the leading
82  n-by-n upper triangular part of A contains the upper
83  triangular part of the matrix A, and the strictly lower
84  triangular part of A is not referenced. If UPLO = 'L', the
85  leading n-by-n lower triangular part of A contains the lower
86  triangular part of the matrix A, and the strictly upper
87  triangular part of A is not referenced.
88  On exit:
89  if UPLO = 'U', the last NB columns have been reduced to
90  tridiagonal form, with the diagonal elements overwriting
91  the diagonal elements of A; the elements above the diagonal
92  with the array TAU, represent the orthogonal matrix Q as a
93  product of elementary reflectors;
94  if UPLO = 'L', the first NB columns have been reduced to
95  tridiagonal form, with the diagonal elements overwriting
96  the diagonal elements of A; the elements below the diagonal
97  with the array TAU, represent the orthogonal matrix Q as a
98  product of elementary reflectors.
99  See Further Details.
100 
101  LDA (input) INTEGER
102  The leading dimension of the array A. LDA >= (1,N).
103 
104  E (output) DOUBLE PRECISION array, dimension (N-1)
105  If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
106  elements of the last NB columns of the reduced matrix;
107  if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
108  the first NB columns of the reduced matrix.
109 
110  TAU (output) DOUBLE PRECISION array, dimension (N-1)
111  The scalar factors of the elementary reflectors, stored in
112  TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
113  See Further Details.
114 
115  W (output) DOUBLE PRECISION array, dimension (LDW,NB)
116  The n-by-nb matrix W required to update the unreduced part
117  of A.
118 
119  LDW (input) INTEGER
120  The leading dimension of the array W. LDW >= max(1,N).
121 
122  Further Details
123  ===============
124 
125  If UPLO = 'U', the matrix Q is represented as a product of elementary
126  reflectors
127 
128  Q = H(n) H(n-1) . . . H(n-nb+1).
129 
130  Each H(i) has the form
131 
132  H(i) = I - tau * v * v'
133 
134  where tau is a real scalar, and v is a real vector with
135  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
136  and tau in TAU(i-1).
137 
138  If UPLO = 'L', the matrix Q is represented as a product of elementary
139  reflectors
140 
141  Q = H(1) H(2) . . . H(nb).
142 
143  Each H(i) has the form
144 
145  H(i) = I - tau * v * v'
146 
147  where tau is a real scalar, and v is a real vector with
148  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
149  and tau in TAU(i).
150 
151  The elements of the vectors v together form the n-by-nb matrix V
152  which is needed, with W, to apply the transformation to the unreduced
153  part of the matrix, using a symmetric rank-2k update of the form:
154  A := A - V*W' - W*V'.
155 
156  The contents of A on exit are illustrated by the following examples
157  with n = 5 and nb = 2:
158 
159  if UPLO = 'U': if UPLO = 'L':
160 
161  ( a a a v4 v5 ) ( d )
162  ( a a v4 v5 ) ( 1 d )
163  ( a 1 v5 ) ( v1 1 a )
164  ( d 1 ) ( v1 v2 a a )
165  ( d ) ( v1 v2 a a a )
166 
167  where d denotes a diagonal element of the reduced matrix, a denotes
168  an element of the original matrix that is unchanged, and vi denotes
169  an element of the vector defining H(i).
170 
171  =====================================================================
172 
173 
174  Quick return if possible
175 
176  Parameter adjustments */
177  /* Table of constant values */
178  Treal c_b5 = -1.;
179  Treal c_b6 = 1.;
180  integer c__1 = 1;
181  Treal c_b16 = 0.;
182 
183  /* System generated locals */
184  integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
185  /* Local variables */
186  integer i__;
187  Treal alpha;
188  integer iw;
189 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
190 #define w_ref(a_1,a_2) w[(a_2)*w_dim1 + a_1]
191 
192 
193  a_dim1 = *lda;
194  a_offset = 1 + a_dim1 * 1;
195  a -= a_offset;
196  --e;
197  --tau;
198  w_dim1 = *ldw;
199  w_offset = 1 + w_dim1 * 1;
200  w -= w_offset;
201 
202  /* Function Body */
203  if (*n <= 0) {
204  return 0;
205  }
206 
207  if (template_blas_lsame(uplo, "U")) {
208 
209 /* Reduce last NB columns of upper triangle */
210 
211  i__1 = *n - *nb + 1;
212  for (i__ = *n; i__ >= i__1; --i__) {
213  iw = i__ - *n + *nb;
214  if (i__ < *n) {
215 
216 /* Update A(1:i,i) */
217 
218  i__2 = *n - i__;
219  template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &a_ref(1, i__ + 1),
220  lda, &w_ref(i__, iw + 1), ldw, &c_b6, &a_ref(1, i__),
221  &c__1);
222  i__2 = *n - i__;
223  template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &w_ref(1, iw + 1),
224  ldw, &a_ref(i__, i__ + 1), lda, &c_b6, &a_ref(1, i__),
225  &c__1);
226  }
227  if (i__ > 1) {
228 
229 /* Generate elementary reflector H(i) to annihilate
230  A(1:i-2,i) */
231 
232  i__2 = i__ - 1;
233  template_lapack_larfg(&i__2, &a_ref(i__ - 1, i__), &a_ref(1, i__), &c__1, &
234  tau[i__ - 1]);
235  e[i__ - 1] = a_ref(i__ - 1, i__);
236  a_ref(i__ - 1, i__) = 1.;
237 
238 /* Compute W(1:i-1,i) */
239 
240  i__2 = i__ - 1;
241  template_blas_symv("Upper", &i__2, &c_b6, &a[a_offset], lda, &a_ref(1,
242  i__), &c__1, &c_b16, &w_ref(1, iw), &c__1);
243  if (i__ < *n) {
244  i__2 = i__ - 1;
245  i__3 = *n - i__;
246  template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(1, iw + 1)
247  , ldw, &a_ref(1, i__), &c__1, &c_b16, &w_ref(i__
248  + 1, iw), &c__1);
249  i__2 = i__ - 1;
250  i__3 = *n - i__;
251  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__
252  + 1), lda, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
253  w_ref(1, iw), &c__1);
254  i__2 = i__ - 1;
255  i__3 = *n - i__;
256  template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(1, i__ +
257  1), lda, &a_ref(1, i__), &c__1, &c_b16, &w_ref(
258  i__ + 1, iw), &c__1);
259  i__2 = i__ - 1;
260  i__3 = *n - i__;
261  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(1, iw
262  + 1), ldw, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
263  w_ref(1, iw), &c__1);
264  }
265  i__2 = i__ - 1;
266  template_blas_scal(&i__2, &tau[i__ - 1], &w_ref(1, iw), &c__1);
267  i__2 = i__ - 1;
268  alpha = tau[i__ - 1] * -.5 * template_blas_dot(&i__2, &w_ref(1, iw), &
269  c__1, &a_ref(1, i__), &c__1);
270  i__2 = i__ - 1;
271  template_blas_axpy(&i__2, &alpha, &a_ref(1, i__), &c__1, &w_ref(1, iw), &
272  c__1);
273  }
274 
275 /* L10: */
276  }
277  } else {
278 
279 /* Reduce first NB columns of lower triangle */
280 
281  i__1 = *nb;
282  for (i__ = 1; i__ <= i__1; ++i__) {
283 
284 /* Update A(i:n,i) */
285 
286  i__2 = *n - i__ + 1;
287  i__3 = i__ - 1;
288  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, &
289  w_ref(i__, 1), ldw, &c_b6, &a_ref(i__, i__), &c__1);
290  i__2 = *n - i__ + 1;
291  i__3 = i__ - 1;
292  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__, 1), ldw, &
293  a_ref(i__, 1), lda, &c_b6, &a_ref(i__, i__), &c__1);
294  if (i__ < *n) {
295 
296 /* Generate elementary reflector H(i) to annihilate
297  A(i+2:n,i)
298 
299  Computing MIN */
300  i__2 = i__ + 2;
301  i__3 = *n - i__;
302  template_lapack_larfg(&i__3, &a_ref(i__ + 1, i__), &a_ref(minMACRO(i__2,*n), i__)
303  , &c__1, &tau[i__]);
304  e[i__] = a_ref(i__ + 1, i__);
305  a_ref(i__ + 1, i__) = 1.;
306 
307 /* Compute W(i+1:n,i) */
308 
309  i__2 = *n - i__;
310  template_blas_symv("Lower", &i__2, &c_b6, &a_ref(i__ + 1, i__ + 1), lda, &
311  a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(i__ + 1,
312  i__), &c__1);
313  i__2 = *n - i__;
314  i__3 = i__ - 1;
315  template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(i__ + 1, 1),
316  ldw, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1,
317  i__), &c__1);
318  i__2 = *n - i__;
319  i__3 = i__ - 1;
320  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1)
321  , lda, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1,
322  i__), &c__1);
323  i__2 = *n - i__;
324  i__3 = i__ - 1;
325  template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(i__ + 1, 1),
326  lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1,
327  i__), &c__1);
328  i__2 = *n - i__;
329  i__3 = i__ - 1;
330  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__ + 1, 1)
331  , ldw, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1,
332  i__), &c__1);
333  i__2 = *n - i__;
334  template_blas_scal(&i__2, &tau[i__], &w_ref(i__ + 1, i__), &c__1);
335  i__2 = *n - i__;
336  alpha = tau[i__] * -.5 * template_blas_dot(&i__2, &w_ref(i__ + 1, i__), &
337  c__1, &a_ref(i__ + 1, i__), &c__1);
338  i__2 = *n - i__;
339  template_blas_axpy(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &w_ref(i__
340  + 1, i__), &c__1);
341  }
342 
343 /* L20: */
344  }
345  }
346 
347  return 0;
348 
349 /* End of DLATRD */
350 
351 } /* dlatrd_ */
352 
353 #undef w_ref
354 #undef a_ref
355 
356 
357 #endif
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:41
#define a_ref(a_1, a_2)
#define w_ref(a_1, a_2)
int integer
Definition: template_blas_common.h:38
#define minMACRO(a, b)
Definition: template_blas_common.h:44
int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *a, const integer *lda, Treal *e, Treal *tau, Treal *w, const integer *ldw)
Definition: template_lapack_latrd.h:40
int template_lapack_larfg(const integer *n, Treal *alpha, Treal *x, const integer *incx, Treal *tau)
Definition: template_lapack_larfg.h:41
int template_blas_gemv(const char *trans, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition: template_blas_gemv.h:41
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:41
int template_blas_symv(const char *uplo, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition: template_blas_symv.h:40
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:44
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:41