ergo
gemm_sse.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 #ifndef GEMM_SSE_H
28 #define GEMM_SSE_H
29 #include <stdexcept>
30 #include "mm_kernel_inner_sse2_A.h"
31 #include "mm_kernel_outer_A.h"
32 
33 
34 template<typename real, typename regType,
35  int m_kernel, int n_kernel, int k_kernel,
36  int m_block, int n_block>
37  static void gemm_sse(real const * const A,
38  real const * const B,
39  real * C,
40  size_t const m,
41  size_t const n,
42  size_t const k,
43  real * A_packed,
44  real * B_packed,
45  real * C_packed,
46  size_t const ap_size,
47  size_t const bp_size,
48  size_t const cp_size) {
49  // typedef double real; typedef __m128d regType;
50  // typedef float real; typedef __m128 regType;
53  if (m != m_kernel*m_block)
54  throw std::runtime_error("Error in gemm_sse(...): m != m_kernel*m_block");
55  if (n != n_kernel*n_block)
56  throw std::runtime_error("Error in gemm_sse(...): n != n_kernel*n_block");
57  if (k != k_kernel)
58  throw std::runtime_error("Error in gemm_sse(...): k != k_kernel");
59  if (ap_size < MM_outer::Pack_type_A::size_packed)
60  throw std::runtime_error("Error in gemm_sse(...): "
61  "ap_size < MM_outer::Pack_type_A::size_packed");
62  if (bp_size < MM_outer::Pack_type_B::size_packed)
63  throw std::runtime_error("Error in gemm_sse(...): "
64  "bp_size < MM_outer::Pack_type_B::size_packed");
65  if (cp_size < MM_outer::Pack_type_C::size_packed)
66  throw std::runtime_error("Error in gemm_sse(...): "
67  "cp_size < MM_outer::Pack_type_C::size_packed");
68  MM_outer::Pack_type_C::template pack<Ordering_col_wise>( C, C_packed, m, n);
69  MM_outer::Pack_type_A::template pack<Ordering_col_wise>( A, A_packed, m, k);
70  MM_outer::Pack_type_B::template pack<Ordering_col_wise>( B, B_packed, k, n);
71  MM_outer::exec(&A_packed, &B_packed, C_packed);
72  MM_outer::Pack_type_C::template unpack<Ordering_col_wise>(C, C_packed, m, n);
73 }
74 
75 template<typename real>
76 static void gemm_sse(real const * const A,
77  real const * const B,
78  real * C,
79  size_t const m,
80  size_t const n,
81  size_t const k,
82  real * A_packed,
83  real * B_packed,
84  real * C_packed,
85  size_t const ap_size,
86  size_t const bp_size,
87  size_t const cp_size) {
88  throw std::runtime_error("gemm_sse not implemented for chosen real type.");
89 }
90 
91 template<>
92 void gemm_sse(double const * const A,
93  double const * const B,
94  double * C,
95  size_t const m,
96  size_t const n,
97  size_t const k,
98  double * A_packed,
99  double * B_packed,
100  double * C_packed,
101  size_t const ap_size,
102  size_t const bp_size,
103  size_t const cp_size) {
104  gemm_sse<double, __m128d, 4, 4, 32, 8, 8>
105  (A, B, C, m, n, k,
106  A_packed, B_packed, C_packed, ap_size, bp_size, cp_size);
107 }
108 
109 template<>
110 void gemm_sse(float const * const A,
111  float const * const B,
112  float * C,
113  size_t const m,
114  size_t const n,
115  size_t const k,
116  float * A_packed,
117  float * B_packed,
118  float * C_packed,
119  size_t const ap_size,
120  size_t const bp_size,
121  size_t const cp_size) {
122  gemm_sse<float, __m128, 8, 4, 32, 4, 8>
123  (A, B, C, m, n, k,
124  A_packed, B_packed, C_packed, ap_size, bp_size, cp_size);
125 }
126 
127 #endif
#define A
Matrix multiplication template for architectures with SSE2 or higher and compilers that support C++ i...
Definition: mm_kernel_inner_sse2_A.h:61
ergo_real real
Definition: cubature_rules.h:33
#define B
Template for matrix matrix multiplication that wraps around a kernel given as template argument...
Definition: mm_kernel_outer_A.h:45
static void gemm_sse(real const *const A, real const *const B, real *C, size_t const m, size_t const n, size_t const k, real *A_packed, real *B_packed, real *C_packed, size_t const ap_size, size_t const bp_size, size_t const cp_size)
Definition: gemm_sse.h:37