ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/scalar.hpp"
27 #include "viennacl/vector.hpp"
29 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
39 #include "viennacl/linalg/prod.hpp"
40 
41 namespace viennacl
42 {
43 namespace linalg
44 {
45 namespace host_based
46 {
47 
48 //
49 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
50 //
51 
52 template<typename DestNumericT, typename SrcNumericT>
54 {
55  assert(mat1.row_major() == mat2.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
56 
57  DestNumericT * data_A = detail::extract_raw_pointer<DestNumericT>(mat1);
58  SrcNumericT const * data_B = detail::extract_raw_pointer<SrcNumericT>(mat2);
59 
60  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
61  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
64  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
65  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
66  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
67  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
68 
69  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
70  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
73  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
74  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
75 
76  if (mat1.row_major())
77  {
78  detail::matrix_array_wrapper<DestNumericT, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
79  detail::matrix_array_wrapper<SrcNumericT const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
80 
81 #ifdef VIENNACL_WITH_OPENMP
82  #pragma omp parallel for
83 #endif
84  for (long row = 0; row < static_cast<long>(A_size1); ++row)
85  for (vcl_size_t col = 0; col < A_size2; ++col)
86  wrapper_A(row, col) = static_cast<DestNumericT>(wrapper_B(row, col));
87  }
88  else
89  {
90  detail::matrix_array_wrapper<DestNumericT, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
91  detail::matrix_array_wrapper<SrcNumericT const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
92 
93 #ifdef VIENNACL_WITH_OPENMP
94  #pragma omp parallel for
95 #endif
96  for (long col = 0; col < static_cast<long>(A_size2); ++col)
97  for (vcl_size_t row = 0; row < A_size1; ++row)
98  wrapper_A(row, col) = static_cast<DestNumericT>(wrapper_B(row, col));
99  }
100 }
101 
102 
103 
104 template<typename NumericT,
105  typename SizeT, typename DistanceT>
108 {
109  const NumericT * temp_proxy = detail::extract_raw_pointer<NumericT>(proxy.lhs());
110  NumericT * temp = detail::extract_raw_pointer<NumericT>(temp_trans);
111 
112  vcl_size_t proxy_int_size1=proxy.lhs().internal_size1();
113  vcl_size_t proxy_int_size2=proxy.lhs().internal_size2();
114  vcl_size_t temp_int_size1=temp_trans.internal_size1();
115  vcl_size_t temp_int_size2=temp_trans.internal_size2();
116 
117 #ifdef VIENNACL_WITH_OPENMP
118  #pragma omp parallel for
119 #endif
120  for (long i2 = 0; i2 < static_cast<long>(proxy_int_size1*proxy_int_size2); ++i2)
121  {
122  vcl_size_t row = vcl_size_t(i2) / proxy_int_size2;
123  vcl_size_t col = vcl_size_t(i2) % proxy_int_size2;
124 
125  if (row < proxy.lhs().size1() && col < proxy.lhs().size2())
126  {
127  if (proxy.lhs().row_major())
128  {
129  vcl_size_t pos = row_major::mem_index(proxy.lhs().start1() + proxy.lhs().stride1() * row,
130  proxy.lhs().start2() + proxy.lhs().stride2() * col,
131  proxy_int_size1, proxy_int_size2);
132  vcl_size_t new_pos = row_major::mem_index(temp_trans.start2() + temp_trans.stride2() * col,
133  temp_trans.start1() + temp_trans.stride1() * row, temp_int_size1,
134  temp_int_size2);
135  temp[new_pos] = temp_proxy[pos];
136  }
137  else
138  {
139  vcl_size_t pos = column_major::mem_index(proxy.lhs().start1() + proxy.lhs().stride1() * row,
140  proxy.lhs().start2() + proxy.lhs().stride2() * col, proxy_int_size1,
141  proxy_int_size2);
142  vcl_size_t new_pos = column_major::mem_index(temp_trans.start2() + temp_trans.stride2() * col,
143  temp_trans.start1() + temp_trans.stride1() * row, temp_int_size1,
144  temp_int_size2);
145  temp[new_pos] = temp_proxy[pos];
146  }
147  }
148  }
149 }
150 
151 template<typename NumericT, typename ScalarT1>
153  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha)
154 {
155  assert(mat1.row_major() == mat2.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
156 
157  typedef NumericT value_type;
158 
159  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
160  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
161 
162  value_type data_alpha = alpha;
163  if (flip_sign_alpha)
164  data_alpha = -data_alpha;
165 
166  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
167  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
168  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
169  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
170  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
171  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
172  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
173  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
174 
175  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
176  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
177  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
178  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
179  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
180  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
181 
182  if (mat1.row_major())
183  {
184  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
185  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
186 
187  if (reciprocal_alpha)
188  {
189 #ifdef VIENNACL_WITH_OPENMP
190  #pragma omp parallel for
191 #endif
192  for (long row = 0; row < static_cast<long>(A_size1); ++row)
193  for (vcl_size_t col = 0; col < A_size2; ++col)
194  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
195  }
196  else
197  {
198 #ifdef VIENNACL_WITH_OPENMP
199  #pragma omp parallel for
200 #endif
201  for (long row = 0; row < static_cast<long>(A_size1); ++row)
202  for (vcl_size_t col = 0; col < A_size2; ++col)
203  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
204  }
205  }
206  else
207  {
208  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
209  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
210 
211  if (reciprocal_alpha)
212  {
213 #ifdef VIENNACL_WITH_OPENMP
214  #pragma omp parallel for
215 #endif
216  for (long col = 0; col < static_cast<long>(A_size2); ++col)
217  for (vcl_size_t row = 0; row < A_size1; ++row)
218  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
219  }
220  else
221  {
222 #ifdef VIENNACL_WITH_OPENMP
223  #pragma omp parallel for
224 #endif
225  for (long col = 0; col < static_cast<long>(A_size2); ++col)
226  for (vcl_size_t row = 0; row < A_size1; ++row)
227  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
228  }
229  }
230 }
231 
232 
233 template<typename NumericT,
234  typename ScalarT1, typename ScalarT2>
236  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
237  matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
238 {
239  assert(mat1.row_major() == mat2.row_major() && mat1.row_major() == mat3.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
240 
241  typedef NumericT value_type;
242 
243  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
244  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
245  value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
246 
247  value_type data_alpha = alpha;
248  if (flip_sign_alpha)
249  data_alpha = -data_alpha;
250 
251  value_type data_beta = beta;
252  if (flip_sign_beta)
253  data_beta = -data_beta;
254 
255  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
256  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
257  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
258  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
259  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
260  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
261  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
262  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
263 
264  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
265  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
266  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
267  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
268  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
269  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
270 
271  vcl_size_t C_start1 = viennacl::traits::start1(mat3);
272  vcl_size_t C_start2 = viennacl::traits::start2(mat3);
273  vcl_size_t C_inc1 = viennacl::traits::stride1(mat3);
274  vcl_size_t C_inc2 = viennacl::traits::stride2(mat3);
275  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3);
276  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3);
277 
278  if (mat1.row_major())
279  {
280  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
281  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
282  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
283 
284  if (reciprocal_alpha && reciprocal_beta)
285  {
286 #ifdef VIENNACL_WITH_OPENMP
287  #pragma omp parallel for
288 #endif
289  for (long row = 0; row < static_cast<long>(A_size1); ++row)
290  for (vcl_size_t col = 0; col < A_size2; ++col)
291  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
292  }
293  else if (reciprocal_alpha && !reciprocal_beta)
294  {
295 #ifdef VIENNACL_WITH_OPENMP
296  #pragma omp parallel for
297 #endif
298  for (long row = 0; row < static_cast<long>(A_size1); ++row)
299  for (vcl_size_t col = 0; col < A_size2; ++col)
300  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
301  }
302  else if (!reciprocal_alpha && reciprocal_beta)
303  {
304 #ifdef VIENNACL_WITH_OPENMP
305  #pragma omp parallel for
306 #endif
307  for (long row = 0; row < static_cast<long>(A_size1); ++row)
308  for (vcl_size_t col = 0; col < A_size2; ++col)
309  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
310  }
311  else if (!reciprocal_alpha && !reciprocal_beta)
312  {
313 #ifdef VIENNACL_WITH_OPENMP
314  #pragma omp parallel for
315 #endif
316  for (long row = 0; row < static_cast<long>(A_size1); ++row)
317  for (vcl_size_t col = 0; col < A_size2; ++col)
318  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
319  }
320  }
321  else
322  {
323  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
324  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
325  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
326 
327  if (reciprocal_alpha && reciprocal_beta)
328  {
329 #ifdef VIENNACL_WITH_OPENMP
330  #pragma omp parallel for
331 #endif
332  for (long col = 0; col < static_cast<long>(A_size2); ++col)
333  for (vcl_size_t row = 0; row < A_size1; ++row)
334  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
335  }
336  else if (reciprocal_alpha && !reciprocal_beta)
337  {
338 #ifdef VIENNACL_WITH_OPENMP
339  #pragma omp parallel for
340 #endif
341  for (long col = 0; col < static_cast<long>(A_size2); ++col)
342  for (vcl_size_t row = 0; row < A_size1; ++row)
343  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
344  }
345  else if (!reciprocal_alpha && reciprocal_beta)
346  {
347 #ifdef VIENNACL_WITH_OPENMP
348  #pragma omp parallel for
349 #endif
350  for (long col = 0; col < static_cast<long>(A_size2); ++col)
351  for (vcl_size_t row = 0; row < A_size1; ++row)
352  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
353  }
354  else if (!reciprocal_alpha && !reciprocal_beta)
355  {
356 #ifdef VIENNACL_WITH_OPENMP
357  #pragma omp parallel for
358 #endif
359  for (long col = 0; col < static_cast<long>(A_size2); ++col)
360  for (vcl_size_t row = 0; row < A_size1; ++row)
361  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
362  }
363  }
364 
365 }
366 
367 
368 template<typename NumericT,
369  typename ScalarT1, typename ScalarT2>
371  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
372  matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
373 {
374  assert(mat1.row_major() == mat2.row_major() && mat1.row_major() == mat3.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
375 
376  typedef NumericT value_type;
377 
378  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
379  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
380  value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
381 
382  value_type data_alpha = alpha;
383  if (flip_sign_alpha)
384  data_alpha = -data_alpha;
385 
386  value_type data_beta = beta;
387  if (flip_sign_beta)
388  data_beta = -data_beta;
389 
390  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
391  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
392  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
393  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
394  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
395  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
396  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
397  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
398 
399  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
400  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
401  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
402  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
403  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
404  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
405 
406  vcl_size_t C_start1 = viennacl::traits::start1(mat3);
407  vcl_size_t C_start2 = viennacl::traits::start2(mat3);
408  vcl_size_t C_inc1 = viennacl::traits::stride1(mat3);
409  vcl_size_t C_inc2 = viennacl::traits::stride2(mat3);
410  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3);
411  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3);
412 
413  if (mat1.row_major())
414  {
415  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
416  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
417  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
418 
419  if (reciprocal_alpha && reciprocal_beta)
420  {
421 #ifdef VIENNACL_WITH_OPENMP
422  #pragma omp parallel for
423 #endif
424  for (long row = 0; row < static_cast<long>(A_size1); ++row)
425  for (vcl_size_t col = 0; col < A_size2; ++col)
426  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
427  }
428  else if (reciprocal_alpha && !reciprocal_beta)
429  {
430 #ifdef VIENNACL_WITH_OPENMP
431  #pragma omp parallel for
432 #endif
433  for (long row = 0; row < static_cast<long>(A_size1); ++row)
434  for (vcl_size_t col = 0; col < A_size2; ++col)
435  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
436  }
437  else if (!reciprocal_alpha && reciprocal_beta)
438  {
439 #ifdef VIENNACL_WITH_OPENMP
440  #pragma omp parallel for
441 #endif
442  for (long row = 0; row < static_cast<long>(A_size1); ++row)
443  for (vcl_size_t col = 0; col < A_size2; ++col)
444  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
445  }
446  else if (!reciprocal_alpha && !reciprocal_beta)
447  {
448 #ifdef VIENNACL_WITH_OPENMP
449  #pragma omp parallel for
450 #endif
451  for (long row = 0; row < static_cast<long>(A_size1); ++row)
452  for (vcl_size_t col = 0; col < A_size2; ++col)
453  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
454  }
455  }
456  else
457  {
458  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
459  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
460  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
461 
462  if (reciprocal_alpha && reciprocal_beta)
463  {
464 #ifdef VIENNACL_WITH_OPENMP
465  #pragma omp parallel for
466 #endif
467  for (long col = 0; col < static_cast<long>(A_size2); ++col)
468  for (vcl_size_t row = 0; row < A_size1; ++row)
469  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
470  }
471  else if (reciprocal_alpha && !reciprocal_beta)
472  {
473 #ifdef VIENNACL_WITH_OPENMP
474  #pragma omp parallel for
475 #endif
476  for (long col = 0; col < static_cast<long>(A_size2); ++col)
477  for (vcl_size_t row = 0; row < A_size1; ++row)
478  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
479  }
480  else if (!reciprocal_alpha && reciprocal_beta)
481  {
482 #ifdef VIENNACL_WITH_OPENMP
483  #pragma omp parallel for
484 #endif
485  for (long col = 0; col < static_cast<long>(A_size2); ++col)
486  for (vcl_size_t row = 0; row < A_size1; ++row)
487  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
488  }
489  else if (!reciprocal_alpha && !reciprocal_beta)
490  {
491 #ifdef VIENNACL_WITH_OPENMP
492  #pragma omp parallel for
493 #endif
494  for (long col = 0; col < static_cast<long>(A_size2); ++col)
495  for (vcl_size_t row = 0; row < A_size1; ++row)
496  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
497  }
498  }
499 
500 }
501 
502 
503 
504 
505 template<typename NumericT>
506 void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false)
507 {
508  typedef NumericT value_type;
509 
510  value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
511  value_type alpha = static_cast<value_type>(s);
512 
513  vcl_size_t A_start1 = viennacl::traits::start1(mat);
514  vcl_size_t A_start2 = viennacl::traits::start2(mat);
519  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
520  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
521 
522  if (mat.row_major())
523  {
524  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
525 
526 #ifdef VIENNACL_WITH_OPENMP
527  #pragma omp parallel for
528 #endif
529  for (long row = 0; row < static_cast<long>(A_size1); ++row)
530  for (vcl_size_t col = 0; col < A_size2; ++col)
531  wrapper_A(static_cast<vcl_size_t>(row), col) = alpha;
532  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
533  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
534  }
535  else
536  {
537  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
538 
539 #ifdef VIENNACL_WITH_OPENMP
540  #pragma omp parallel for
541 #endif
542  for (long col = 0; col < static_cast<long>(A_size2); ++col)
543  for (vcl_size_t row = 0; row < A_size1; ++row)
544  wrapper_A(row, static_cast<vcl_size_t>(col)) = alpha;
545  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
546  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
547  }
548 }
549 
550 
551 
552 template<typename NumericT>
554 {
555  typedef NumericT value_type;
556 
557  value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
558  value_type alpha = static_cast<value_type>(s);
559 
560  vcl_size_t A_start1 = viennacl::traits::start1(mat);
561  vcl_size_t A_start2 = viennacl::traits::start2(mat);
564  vcl_size_t A_size1 = viennacl::traits::size1(mat);
565  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
566  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
567  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
568 
569  if (mat.row_major())
570  {
571  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
572 
573 #ifdef VIENNACL_WITH_OPENMP
574  #pragma omp parallel for
575 #endif
576  for (long row = 0; row < static_cast<long>(A_size1); ++row)
577  wrapper_A(row, row) = alpha;
578  }
579  else
580  {
581  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
582 
583 #ifdef VIENNACL_WITH_OPENMP
584  #pragma omp parallel for
585 #endif
586  for (long row = 0; row < static_cast<long>(A_size1); ++row)
587  wrapper_A(row, row) = alpha;
588  }
589 }
590 
591 template<typename NumericT>
593 {
594  typedef NumericT value_type;
595 
596  value_type *data_A = detail::extract_raw_pointer<value_type>(mat);
597  value_type const *data_vec = detail::extract_raw_pointer<value_type>(vec);
598 
599  vcl_size_t A_start1 = viennacl::traits::start1(mat);
600  vcl_size_t A_start2 = viennacl::traits::start2(mat);
603  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
604  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
605  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
606  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
607 
608  vcl_size_t v_start = viennacl::traits::start(vec);
610  vcl_size_t v_size = viennacl::traits::size(vec);
611 
612  vcl_size_t row_start = 0;
613  vcl_size_t col_start = 0;
614 
615  if (k >= 0)
616  col_start = static_cast<vcl_size_t>(k);
617  else
618  row_start = static_cast<vcl_size_t>(-k);
619 
620  matrix_assign(mat, NumericT(0));
621 
622  if (mat.row_major())
623  {
624  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
625 
626  for (vcl_size_t i = 0; i < v_size; ++i)
627  wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
628  }
629  else
630  {
631  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
632 
633  for (vcl_size_t i = 0; i < v_size; ++i)
634  wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
635  }
636 }
637 
638 template<typename NumericT>
640 {
641  typedef NumericT value_type;
642 
643  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
644  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
645 
646  vcl_size_t A_start1 = viennacl::traits::start1(mat);
647  vcl_size_t A_start2 = viennacl::traits::start2(mat);
650  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
651  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
652  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
653  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
654 
655  vcl_size_t v_start = viennacl::traits::start(vec);
657  vcl_size_t v_size = viennacl::traits::size(vec);
658 
659  vcl_size_t row_start = 0;
660  vcl_size_t col_start = 0;
661 
662  if (k >= 0)
663  col_start = static_cast<vcl_size_t>(k);
664  else
665  row_start = static_cast<vcl_size_t>(-k);
666 
667  if (mat.row_major())
668  {
669  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
670 
671  for (vcl_size_t i = 0; i < v_size; ++i)
672  data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
673  }
674  else
675  {
676  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
677 
678  for (vcl_size_t i = 0; i < v_size; ++i)
679  data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
680  }
681 }
682 
683 template<typename NumericT>
684 void matrix_row(const matrix_base<NumericT> & mat, unsigned int i, vector_base<NumericT> & vec)
685 {
686  typedef NumericT value_type;
687 
688  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
689  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
690 
691  vcl_size_t A_start1 = viennacl::traits::start1(mat);
692  vcl_size_t A_start2 = viennacl::traits::start2(mat);
695  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
696  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
697  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
698  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
699 
700  vcl_size_t v_start = viennacl::traits::start(vec);
702  vcl_size_t v_size = viennacl::traits::size(vec);
703 
704  if (mat.row_major())
705  {
706  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
707 
708  for (vcl_size_t j = 0; j < v_size; ++j)
709  data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
710  }
711  else
712  {
713  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
714 
715  for (vcl_size_t j = 0; j < v_size; ++j)
716  data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
717  }
718 }
719 
720 template<typename NumericT>
721 void matrix_column(const matrix_base<NumericT> & mat, unsigned int j, vector_base<NumericT> & vec)
722 {
723  typedef NumericT value_type;
724 
725  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
726  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
727 
728  vcl_size_t A_start1 = viennacl::traits::start1(mat);
729  vcl_size_t A_start2 = viennacl::traits::start2(mat);
732  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
733  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
734  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
735  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
736 
737  vcl_size_t v_start = viennacl::traits::start(vec);
739  vcl_size_t v_size = viennacl::traits::size(vec);
740 
741  if (mat.row_major())
742  {
743  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
744 
745  for (vcl_size_t i = 0; i < v_size; ++i)
746  data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
747  }
748  else
749  {
750  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
751 
752  for (vcl_size_t i = 0; i < v_size; ++i)
753  data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
754  }
755 }
756 
757 //
759 //
760 
761 // Binary operations A = B .* C and A = B ./ C
762 
768 template<typename NumericT, typename OpT>
771 {
772  assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!"));
773 
774  typedef NumericT value_type;
776 
777  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
778  value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
779  value_type const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
780 
781  vcl_size_t A_start1 = viennacl::traits::start1(A);
782  vcl_size_t A_start2 = viennacl::traits::start2(A);
785  vcl_size_t A_size1 = viennacl::traits::size1(A);
786  vcl_size_t A_size2 = viennacl::traits::size2(A);
787  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
788  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
789 
790  vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
791  vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
792  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs());
793  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs());
794  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
795  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
796 
797  vcl_size_t C_start1 = viennacl::traits::start1(proxy.rhs());
798  vcl_size_t C_start2 = viennacl::traits::start2(proxy.rhs());
799  vcl_size_t C_inc1 = viennacl::traits::stride1(proxy.rhs());
800  vcl_size_t C_inc2 = viennacl::traits::stride2(proxy.rhs());
801  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(proxy.rhs());
802  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(proxy.rhs());
803 
804  if (A.row_major())
805  {
806  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
807  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
808  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
809 
810 #ifdef VIENNACL_WITH_OPENMP
811  #pragma omp parallel for
812 #endif
813  for (long row = 0; row < static_cast<long>(A_size1); ++row)
814  for (vcl_size_t col = 0; col < A_size2; ++col)
815  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
816  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
817  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
818  // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
819  }
820  else
821  {
822  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
823  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
824  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
825 
826 #ifdef VIENNACL_WITH_OPENMP
827  #pragma omp parallel for
828 #endif
829  for (long col = 0; col < static_cast<long>(A_size2); ++col)
830  for (vcl_size_t row = 0; row < A_size1; ++row)
831  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
832 
833  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
834  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
835  // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
836  }
837 }
838 
839 // Unary operations
840 
841 // A = op(B)
842 template<typename NumericT, typename OpT>
845 {
846  assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!"));
847 
848  typedef NumericT value_type;
850 
851  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
852  value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
853 
854  vcl_size_t A_start1 = viennacl::traits::start1(A);
855  vcl_size_t A_start2 = viennacl::traits::start2(A);
858  vcl_size_t A_size1 = viennacl::traits::size1(A);
859  vcl_size_t A_size2 = viennacl::traits::size2(A);
860  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
861  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
862 
863  vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
864  vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
865  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs());
866  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs());
867  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
868  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
869 
870  if (A.row_major())
871  {
872  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
873  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
874 
875 #ifdef VIENNACL_WITH_OPENMP
876  #pragma omp parallel for
877 #endif
878  for (long row = 0; row < static_cast<long>(A_size1); ++row)
879  for (vcl_size_t col = 0; col < A_size2; ++col)
880  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
881  }
882  else
883  {
884  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
885  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
886 
887 #ifdef VIENNACL_WITH_OPENMP
888  #pragma omp parallel for
889 #endif
890  for (long col = 0; col < static_cast<long>(A_size2); ++col)
891  for (vcl_size_t row = 0; row < A_size1; ++row)
892  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
893  }
894 }
895 
896 
897 
898 //
900 //
901 
902 // A * x
903 
913 template<typename NumericT>
914 void prod_impl(const matrix_base<NumericT> & mat, bool trans,
915  const vector_base<NumericT> & vec,
916  vector_base<NumericT> & result)
917 {
918  typedef NumericT value_type;
919 
920  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
921  value_type const * data_x = detail::extract_raw_pointer<value_type>(vec);
922  value_type * data_result = detail::extract_raw_pointer<value_type>(result);
923 
924  vcl_size_t A_start1 = viennacl::traits::start1(mat);
925  vcl_size_t A_start2 = viennacl::traits::start2(mat);
928  vcl_size_t A_size1 = viennacl::traits::size1(mat);
929  vcl_size_t A_size2 = viennacl::traits::size2(mat);
930  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
931  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
932 
935 
937  vcl_size_t inc2 = viennacl::traits::stride(result);
938 
939  if (mat.row_major())
940  {
941  if (trans)
942  {
943  {
944  value_type temp = data_x[start1];
945  for (vcl_size_t row = 0; row < A_size2; ++row)
946  data_result[row * inc2 + start2] = data_A[viennacl::row_major::mem_index(A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
947  }
948 
949  for (vcl_size_t col = 1; col < A_size1; ++col) //run through matrix sequentially
950  {
951  value_type temp = data_x[col * inc1 + start1];
952  for (vcl_size_t row = 0; row < A_size2; ++row)
953  {
954  data_result[row * inc2 + start2] += data_A[viennacl::row_major::mem_index(col * A_inc1 + A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
955  }
956  }
957  }
958  else
959  {
960 #ifdef VIENNACL_WITH_OPENMP
961  #pragma omp parallel for
962 #endif
963  for (long row = 0; row < static_cast<long>(A_size1); ++row)
964  {
965  value_type temp = 0;
966  for (vcl_size_t col = 0; col < A_size2; ++col)
967  temp += data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
968 
969  data_result[static_cast<vcl_size_t>(row) * inc2 + start2] = temp;
970  }
971  }
972  }
973  else
974  {
975  if (!trans)
976  {
977  {
978  value_type temp = data_x[start1];
979  for (vcl_size_t row = 0; row < A_size1; ++row)
980  data_result[row * inc2 + start2] = data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, A_start2, A_internal_size1, A_internal_size2)] * temp;
981  }
982  for (vcl_size_t col = 1; col < A_size2; ++col) //run through matrix sequentially
983  {
984  value_type temp = data_x[col * inc1 + start1];
985  for (vcl_size_t row = 0; row < A_size1; ++row)
986  data_result[row * inc2 + start2] += data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
987  }
988  }
989  else
990  {
991 #ifdef VIENNACL_WITH_OPENMP
992  #pragma omp parallel for
993 #endif
994  for (long row = 0; row < static_cast<long>(A_size2); ++row)
995  {
996  value_type temp = 0;
997  for (vcl_size_t col = 0; col < A_size1; ++col)
998  temp += data_A[viennacl::column_major::mem_index(col * A_inc1 + A_start1, static_cast<vcl_size_t>(row) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
999 
1000  data_result[static_cast<vcl_size_t>(row) * inc2 + start2] = temp;
1001  }
1002  }
1003  }
1004 }
1005 
1006 
1007 
1008 //
1010 //
1011 
1012 namespace detail
1013 {
1014  template<typename MatrixAccT1, typename MatrixAccT2, typename MatrixAccT3, typename NumericT>
1015  void prod(MatrixAccT1 & A, MatrixAccT2 & B, MatrixAccT3 & C,
1016  vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2,
1017  NumericT alpha, NumericT beta)
1018  {
1019  if (C_size1 == 0 || C_size2 == 0 || A_size2 == 0)
1020  return;
1021 
1022  static const vcl_size_t blocksize = 64;
1023 
1024  vcl_size_t num_blocks_C1 = (C_size1 - 1) / blocksize + 1;
1025  vcl_size_t num_blocks_C2 = (C_size2 - 1) / blocksize + 1;
1026  vcl_size_t num_blocks_A2 = (A_size2 - 1) / blocksize + 1;
1027 
1028  //
1029  // outer loop pair: Run over all blocks with indices (block_idx_i, block_idx_j) of the result matrix C:
1030  //
1031 #ifdef VIENNACL_WITH_OPENMP
1032  #pragma omp parallel for
1033 #endif
1034  for (long block_idx_i2=0; block_idx_i2<static_cast<long>(num_blocks_C1); ++block_idx_i2)
1035  {
1036  // thread-local auxiliary buffers
1037  std::vector<NumericT> buffer_A(blocksize * blocksize); // row-major
1038  std::vector<NumericT> buffer_B(blocksize * blocksize); // column-major
1039  std::vector<NumericT> buffer_C(blocksize * blocksize); // row-major
1040 
1041  vcl_size_t block_idx_i = static_cast<vcl_size_t>(block_idx_i2);
1042  for (vcl_size_t block_idx_j=0; block_idx_j<num_blocks_C2; ++block_idx_j)
1043  {
1044  // Reset block matrix:
1045  std::fill(buffer_C.begin(), buffer_C.end(), NumericT(0));
1046 
1047  vcl_size_t offset_i = block_idx_i*blocksize;
1048  vcl_size_t offset_j = block_idx_j*blocksize;
1049 
1050  // C(block_idx_i, block_idx_i) += A(block_idx_i, block_idx_k) * B(block_idx_k, block_idx_j)
1051  for (vcl_size_t block_idx_k=0; block_idx_k<num_blocks_A2; ++block_idx_k)
1052  {
1053  // flush buffers:
1054  std::fill(buffer_A.begin(), buffer_A.end(), NumericT(0));
1055  std::fill(buffer_B.begin(), buffer_B.end(), NumericT(0));
1056 
1057  vcl_size_t offset_k = block_idx_k*blocksize;
1058 
1059  // load current data:
1060  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1061  for (vcl_size_t k = offset_k; k < std::min(offset_k + blocksize, A_size2); ++k)
1062  buffer_A[(i - offset_i) * blocksize + (k - offset_k)] = A(i, k);
1063 
1064  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1065  for (vcl_size_t k = offset_k; k < std::min(offset_k + blocksize, A_size2); ++k)
1066  buffer_B[(k - offset_k) + (j - offset_j) * blocksize] = B(k, j);
1067 
1068  // multiply (this is the hot spot in terms of flops)
1069  for (vcl_size_t i = 0; i < blocksize; ++i)
1070  {
1071  NumericT const * ptrA = &(buffer_A[i*blocksize]);
1072  for (vcl_size_t j = 0; j < blocksize; ++j)
1073  {
1074  NumericT const * ptrB = &(buffer_B[j*blocksize]);
1075 
1076  NumericT temp = NumericT(0);
1077  for (vcl_size_t k = 0; k < blocksize; ++k)
1078  temp += ptrA[k] * ptrB[k]; // buffer_A[i*blocksize + k] * buffer_B[k + j*blocksize];
1079 
1080  buffer_C[i*blocksize + j] += temp;
1081  }
1082  }
1083  }
1084 
1085  // write result:
1086  if (beta > 0 || beta < 0)
1087  {
1088  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1089  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1090  C(i,j) = beta * C(i,j) + alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1091  }
1092  else
1093  {
1094  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1095  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1096  C(i,j) = alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1097  }
1098 
1099  } // for block j
1100  } // for block i
1101 
1102  } // prod()
1103 
1104 } // namespace detail
1105 
1111 template<typename NumericT, typename ScalarT1, typename ScalarT2 >
1112 void prod_impl(const matrix_base<NumericT> & A, bool trans_A,
1113  const matrix_base<NumericT> & B, bool trans_B,
1115  ScalarT1 alpha,
1116  ScalarT2 beta)
1117 {
1118  typedef NumericT value_type;
1119 
1120  value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
1121  value_type const * data_B = detail::extract_raw_pointer<value_type>(B);
1122  value_type * data_C = detail::extract_raw_pointer<value_type>(C);
1123 
1124  vcl_size_t A_start1 = viennacl::traits::start1(A);
1125  vcl_size_t A_start2 = viennacl::traits::start2(A);
1128  vcl_size_t A_size1 = viennacl::traits::size1(A);
1129  vcl_size_t A_size2 = viennacl::traits::size2(A);
1130  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1131  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1132 
1133  vcl_size_t B_start1 = viennacl::traits::start1(B);
1134  vcl_size_t B_start2 = viennacl::traits::start2(B);
1137  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B);
1138  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B);
1139 
1140  vcl_size_t C_start1 = viennacl::traits::start1(C);
1141  vcl_size_t C_start2 = viennacl::traits::start2(C);
1144  vcl_size_t C_size1 = viennacl::traits::size1(C);
1145  vcl_size_t C_size2 = viennacl::traits::size2(C);
1146  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C);
1147  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C);
1148 
1149  if (!trans_A && !trans_B)
1150  {
1151  if (A.row_major() && B.row_major() && C.row_major())
1152  {
1153  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1154  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1155  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1156 
1157  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1158  }
1159  else if (A.row_major() && B.row_major() && !C.row_major())
1160  {
1161  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1162  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1163  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1164 
1165  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1166  }
1167  else if (A.row_major() && !B.row_major() && C.row_major())
1168  {
1169  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1170  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1171  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1172 
1173  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1174  }
1175  else if (A.row_major() && !B.row_major() && !C.row_major())
1176  {
1177  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1178  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1179  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1180 
1181  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1182  }
1183  else if (!A.row_major() && B.row_major() && C.row_major())
1184  {
1185  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1186  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1187  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1188 
1189  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1190  }
1191  else if (!A.row_major() && B.row_major() && !C.row_major())
1192  {
1193  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1194  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1195  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1196 
1197  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1198  }
1199  else if (!A.row_major() && !B.row_major() && C.row_major())
1200  {
1201  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1202  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1203  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1204 
1205  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1206  }
1207  else
1208  {
1209  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1210  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1211  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1212 
1213  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1214  }
1215  }
1216  else if (!trans_A && trans_B)
1217  {
1218  if (A.row_major() && B.row_major() && C.row_major())
1219  {
1220  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1221  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1222  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1223 
1224  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1225  }
1226  else if (A.row_major() && B.row_major() && !C.row_major())
1227  {
1228  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1229  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1230  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1231 
1232  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1233  }
1234  else if (A.row_major() && !B.row_major() && C.row_major())
1235  {
1236  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1237  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1238  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1239 
1240  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1241  }
1242  else if (A.row_major() && !B.row_major() && !C.row_major())
1243  {
1244  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1245  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1246  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1247 
1248  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1249  }
1250  else if (!A.row_major() && B.row_major() && C.row_major())
1251  {
1252  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1253  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1254  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1255 
1256  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1257  }
1258  else if (!A.row_major() && B.row_major() && !C.row_major())
1259  {
1260  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1261  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1262  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1263 
1264  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1265  }
1266  else if (!A.row_major() && !B.row_major() && C.row_major())
1267  {
1268  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1269  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1270  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1271 
1272  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1273  }
1274  else
1275  {
1276  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1277  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1278  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1279 
1280  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1281  }
1282  }
1283  else if (trans_A && !trans_B)
1284  {
1285  if (A.row_major() && B.row_major() && C.row_major())
1286  {
1287  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1288  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1289  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1290 
1291  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1292  }
1293  else if (A.row_major() && B.row_major() && !C.row_major())
1294  {
1295  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1296  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1297  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1298 
1299  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1300  }
1301  else if (A.row_major() && !B.row_major() && C.row_major())
1302  {
1303  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1304  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1305  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1306 
1307  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1308  }
1309  else if (A.row_major() && !B.row_major() && !C.row_major())
1310  {
1311  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1312  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1313  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1314 
1315  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1316  }
1317  else if (!A.row_major() && B.row_major() && C.row_major())
1318  {
1319  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1320  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1321  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1322 
1323  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1324  }
1325  else if (!A.row_major() && B.row_major() && !C.row_major())
1326  {
1327  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1328  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1329  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1330 
1331  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1332  }
1333  else if (!A.row_major() && !B.row_major() && C.row_major())
1334  {
1335  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1336  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1337  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1338 
1339  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1340  }
1341  else
1342  {
1343  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1344  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1345  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1346 
1347  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1348  }
1349  }
1350  else if (trans_A && trans_B)
1351  {
1352  if (A.row_major() && B.row_major() && C.row_major())
1353  {
1354  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1355  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1356  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1357 
1358  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1359  }
1360  else if (A.row_major() && B.row_major() && !C.row_major())
1361  {
1362  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1363  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1364  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1365 
1366  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1367  }
1368  else if (A.row_major() && !B.row_major() && C.row_major())
1369  {
1370  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1371  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1372  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1373 
1374  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1375  }
1376  else if (A.row_major() && !B.row_major() && !C.row_major())
1377  {
1378  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1379  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1380  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1381 
1382  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1383  }
1384  else if (!A.row_major() && B.row_major() && C.row_major())
1385  {
1386  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1387  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1388  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1389 
1390  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1391  }
1392  else if (!A.row_major() && B.row_major() && !C.row_major())
1393  {
1394  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1395  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1396  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1397 
1398  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1399  }
1400  else if (!A.row_major() && !B.row_major() && C.row_major())
1401  {
1402  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1403  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1404  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1405 
1406  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1407  }
1408  else
1409  {
1410  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1411  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1412  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1413 
1414  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1415  }
1416  }
1417 }
1418 
1419 
1420 
1421 
1422 //
1424 //
1425 
1426 
1438 template<typename NumericT, typename ScalarT>
1440  ScalarT const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
1441  const vector_base<NumericT> & vec1,
1442  const vector_base<NumericT> & vec2)
1443 {
1444  typedef NumericT value_type;
1445 
1446  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
1447  value_type const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
1448  value_type const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
1449 
1450  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
1451  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
1452  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
1453  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
1454  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
1455  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
1456  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
1457  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
1458 
1460  vcl_size_t inc1 = viennacl::traits::stride(vec1);
1461 
1463  vcl_size_t inc2 = viennacl::traits::stride(vec2);
1464 
1465  value_type data_alpha = alpha;
1466  if (flip_sign_alpha)
1467  data_alpha = -data_alpha;
1468  if (reciprocal_alpha)
1469  data_alpha = static_cast<value_type>(1) / data_alpha;
1470 
1471  if (mat1.row_major())
1472  {
1473  for (vcl_size_t row = 0; row < A_size1; ++row)
1474  {
1475  value_type value_v1 = data_alpha * data_v1[row * inc1 + start1];
1476  for (vcl_size_t col = 0; col < A_size2; ++col)
1477  data_A[viennacl::row_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
1478  }
1479  }
1480  else
1481  {
1482  for (vcl_size_t col = 0; col < A_size2; ++col) //run through matrix sequentially
1483  {
1484  value_type value_v2 = data_alpha * data_v2[col * inc2 + start2];
1485  for (vcl_size_t row = 0; row < A_size1; ++row)
1486  data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[row * inc1 + start1] * value_v2;
1487  }
1488  }
1489 }
1490 
1491 
1499 template <typename NumericT, typename S1>
1501  vector_base<S1> & D,
1502  vector_base<S1> & S
1503  )
1504 
1505  {
1506  typedef NumericT value_type;
1507 
1508  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1509  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1510  value_type * data_S = detail::extract_raw_pointer<value_type>(S);
1511 
1512  vcl_size_t A_start1 = viennacl::traits::start1(A);
1513  vcl_size_t A_start2 = viennacl::traits::start2(A);
1516  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1517  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1518 
1522 
1526 
1527  vcl_size_t size = std::min(size1, size2);
1528  if (A.row_major())
1529  {
1530 #ifdef VIENNACL_WITH_OPENMP
1531  #pragma omp parallel for
1532 #endif
1533  for(long i2 = 0; i2 < long(size) - 1; i2++)
1534  {
1535  vcl_size_t i = vcl_size_t(i2);
1536  data_D[start1 + inc1 * i] = data_A[viennacl::row_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1537  data_S[start2 + inc2 * (i + 1)] = data_A[viennacl::row_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1538  }
1539  data_D[start1 + inc1 * (size-1)] = data_A[viennacl::row_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1540 
1541  }
1542  else
1543  {
1544 #ifdef VIENNACL_WITH_OPENMP
1545  #pragma omp parallel for
1546 #endif
1547  for(long i2 = 0; i2 < long(size) - 1; i2++)
1548  {
1549  vcl_size_t i = vcl_size_t(i2);
1550  data_D[start1 + inc1 * i] = data_A[viennacl::column_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1551  data_S[start2 + inc2 * (i + 1)] = data_A[viennacl::column_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1552  }
1553  data_D[start1 + inc1 * (size-1)] = data_A[viennacl::column_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1554  }
1555 
1556  }
1557 
1558 
1559 
1560  template <typename NumericT, typename VectorType>
1562  VectorType & dh,
1563  VectorType & sh
1564  )
1565  {
1566 
1568 
1569  }
1570 
1577  template <typename NumericT>
1580  vcl_size_t start)
1581  {
1582  typedef NumericT value_type;
1583  NumericT ss = 0;
1584  vcl_size_t row_start = start + 1;
1585 
1586  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1587  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1588 
1589  vcl_size_t A_start1 = viennacl::traits::start1(A);
1590  vcl_size_t A_start2 = viennacl::traits::start2(A);
1593  vcl_size_t A_size1 = viennacl::traits::size1(A);
1594  vcl_size_t A_size2 = viennacl::traits::size2(A);
1595  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1596  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1597 
1600 
1601  if (A.row_major())
1602  {
1603  for(vcl_size_t i = 0; i < A_size2; i++)
1604  {
1605  ss = 0;
1606  for(vcl_size_t j = row_start; j < A_size1; j++)
1607  ss = ss + data_D[start1 + inc1 * j] * data_A[viennacl::row_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1608 #ifdef VIENNACL_WITH_OPENMP
1609  #pragma omp parallel for
1610 #endif
1611  for(long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1612  data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1613  data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1614  (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)]* ss);
1615  }
1616  }
1617  else
1618  {
1619  for(vcl_size_t i = 0; i < A_size2; i++)
1620  {
1621  ss = 0;
1622  for(vcl_size_t j = row_start; j < A_size1; j++)
1623  ss = ss + data_D[start1 + inc1 * j] * data_A[viennacl::column_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1624 #ifdef VIENNACL_WITH_OPENMP
1625  #pragma omp parallel for
1626 #endif
1627  for(long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1628  data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]=
1629  data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1630  (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)]* ss);
1631  }
1632  }
1633 
1634  }
1635 
1642  template <typename NumericT>
1645  {
1646  typedef NumericT value_type;
1647  NumericT ss = 0;
1648 
1649  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1650  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1651 
1652  vcl_size_t A_start1 = viennacl::traits::start1(A);
1653  vcl_size_t A_start2 = viennacl::traits::start2(A);
1656  vcl_size_t A_size1 = viennacl::traits::size1(A);
1657  vcl_size_t A_size2 = viennacl::traits::size2(A);
1658  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1659  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1660 
1663 
1664  if (A.row_major())
1665  {
1666  for(vcl_size_t i = 0; i < A_size1; i++)
1667  {
1668  ss = 0;
1669  for(vcl_size_t j = 0; j < A_size2; j++) // ss = ss + D[j] * A(i, j)
1670  ss = ss + (data_D[start1 + inc1 * j] * data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1671 
1672  NumericT sum_Av = ss;
1673 #ifdef VIENNACL_WITH_OPENMP
1674  #pragma omp parallel for
1675 #endif
1676  for(long j = 0; j < static_cast<long>(A_size2); j++) // A(i, j) = A(i, j) - 2 * D[j] * sum_Av
1677  data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1678  data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)] * sum_Av);
1679  }
1680  }
1681  else
1682  {
1683  for(vcl_size_t i = 0; i < A_size1; i++)
1684  {
1685  ss = 0;
1686  for(vcl_size_t j = 0; j < A_size2; j++) // ss = ss + D[j] * A(i, j)
1687  ss = ss + (data_D[start1 + inc1 * j] * data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1688 
1689  NumericT sum_Av = ss;
1690 #ifdef VIENNACL_WITH_OPENMP
1691  #pragma omp parallel for
1692 #endif
1693  for(long j = 0; j < static_cast<long>(A_size2); j++) // A(i, j) = A(i, j) - 2 * D[j] * sum_Av
1694  data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1695  data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)] * sum_Av);
1696  }
1697  }
1698 
1699 
1700  }
1701 
1708  template <typename NumericT>
1711  vcl_size_t A_size1)
1712 
1713  {
1714  NumericT beta = 2;
1716  viennacl::matrix<NumericT> Q_temp = Q;
1717  viennacl::vector<NumericT> vcl_D = D;
1718 
1719 
1720  viennacl::linalg::host_based::scaled_rank_1_update(vcl_P, beta, 1, 0, 1, vcl_D, vcl_D);
1721  Q = viennacl::linalg::prod(Q_temp, vcl_P);
1722 
1723  }
1724 
1734  template<typename NumericT>
1736  vector_base<NumericT> & tmp1,
1737  vector_base<NumericT> & tmp2,
1738  int l,
1739  int m
1740  )
1741  {
1742  typedef NumericT value_type;
1743 
1744  value_type * data_Q = detail::extract_raw_pointer<value_type>(Q);
1745  value_type * data_tmp1 = detail::extract_raw_pointer<value_type>(tmp1);
1746  value_type * data_tmp2 = detail::extract_raw_pointer<value_type>(tmp2);
1747 
1748  vcl_size_t Q_start1 = viennacl::traits::start1(Q);
1749  vcl_size_t Q_start2 = viennacl::traits::start2(Q);
1752  vcl_size_t Q_size1 = viennacl::traits::size1(Q);
1753  vcl_size_t Q_internal_size1 = viennacl::traits::internal_size1(Q);
1754  vcl_size_t Q_internal_size2 = viennacl::traits::internal_size2(Q);
1755 
1757  vcl_size_t inc1 = viennacl::traits::stride(tmp1);
1758 
1760  vcl_size_t inc2 = viennacl::traits::stride(tmp2);
1761 
1762  if (Q.row_major())
1763  {
1764  for( int i = m - 1; i >= l; i--)
1765  {
1766 #ifdef VIENNACL_WITH_OPENMP
1767  #pragma omp parallel for
1768 #endif
1769  for(long k = 0; k < static_cast<long>(Q_size1); k++)
1770  {
1771 
1772  // h = data_Q(k, i+1);
1773  NumericT h = data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)];
1774 
1775  // Q(k, i+1) = tmp2[i] * Q(k, i) + tmp1[i]*h;
1776  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp2[start2 + inc2 * vcl_size_t(i)] *
1777  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] + data_tmp1[start1 + inc1 * vcl_size_t(i)] * h;
1778 
1779  // Q(k, i) = tmp1[i] * Q(k, i) - tmp2[i]*h;
1780  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp1[start1 + inc1 * vcl_size_t(i)] *
1781  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] - data_tmp2[start2 + inc2 * vcl_size_t(i)]*h;
1782  }
1783  }
1784  }
1785  else // column_major
1786  {
1787  for( int i = m - 1; i >= l; i--)
1788  {
1789 #ifdef VIENNACL_WITH_OPENMP
1790  #pragma omp parallel for
1791 #endif
1792  for(long k = 0; k < static_cast<long>(Q_size1); k++)
1793  {
1794 
1795  // h = data_Q(k, i+1);
1796  NumericT h = data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)];
1797 
1798  // Q(k, i+1) = tmp2[i] * Q(k, i) + tmp1[i]*h;
1799  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp2[start2 + inc2 * vcl_size_t(i)] *
1800  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] + data_tmp1[start1 + inc1 * vcl_size_t(i)] * h;
1801 
1802  // Q(k, i) = tmp1[i] * Q(k, i) - tmp2[i]*h;
1803  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp1[start1 + inc1 * vcl_size_t(i)] *
1804  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] - data_tmp2[start2 + inc2 * vcl_size_t(i)]*h;
1805  }
1806  }
1807  }
1808 
1809  }
1810 
1811 
1821  template <typename NumericT, typename S1>
1823  vector_base<S1> & V,
1824  vcl_size_t row_start,
1825  vcl_size_t col_start,
1826  bool copy_col
1827  )
1828  {
1829  typedef NumericT value_type;
1830 
1831  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1832  value_type * data_V = detail::extract_raw_pointer<value_type>(V);
1833 
1834  vcl_size_t A_start1 = viennacl::traits::start1(A);
1835  vcl_size_t A_start2 = viennacl::traits::start2(A);
1838  vcl_size_t A_size1 = viennacl::traits::size1(A);
1839  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1840  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1841 
1842 
1843  if(copy_col)
1844  {
1845  if (A.row_major())
1846  {
1847 #ifdef VIENNACL_WITH_OPENMP
1848  #pragma omp parallel for
1849 #endif
1850  for(long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
1851  {
1852  data_V[i - static_cast<long>(row_start)] = data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1853  }
1854  }
1855  else
1856  {
1857 #ifdef VIENNACL_WITH_OPENMP
1858  #pragma omp parallel for
1859 #endif
1860  for(long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
1861  {
1862  data_V[i - static_cast<long>(row_start)] = data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1863  }
1864  }
1865  }
1866  else
1867  {
1868  if (A.row_major())
1869  {
1870 #ifdef VIENNACL_WITH_OPENMP
1871  #pragma omp parallel for
1872 #endif
1873  for(long i = static_cast<long>(col_start); i < static_cast<long>(A_size1); i++)
1874  {
1875  data_V[i - static_cast<long>(col_start)] = data_A[viennacl::row_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1876  }
1877  }
1878  else
1879  {
1880 #ifdef VIENNACL_WITH_OPENMP
1881  #pragma omp parallel for
1882 #endif
1883  for(long i = static_cast<long>(col_start); i < static_cast<long>(A_size1); i++)
1884  {
1885  data_V[i - static_cast<long>(col_start)] = data_A[viennacl::column_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1886  }
1887  }
1888  }
1889  }
1890 
1891 } // namespace host_based
1892 } //namespace linalg
1893 } //namespace viennacl
1894 
1895 
1896 #endif
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value.
Definition: fill.hpp:46
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:314
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
void bidiag_pack_impl(matrix_base< NumericT > &A, vector_base< S1 > &D, vector_base< S1 > &S)
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:382
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:163
Worker class for decomposing expression templates.
Definition: op_applier.hpp:43
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:390
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:341
size_type stride2() const
Returns the number of columns.
Definition: matrix_def.hpp:234
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:43
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
Determines row and column increments for matrices and matrix proxies.
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:201
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
float NumericT
Definition: bisect.cpp:40
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:102
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
Helper array for accessing a strided submatrix embedded in a larger matrix.
Definition: common.hpp:73
Definition: blas3.hpp:36
void copy_vec(matrix_base< NumericT > &A, vector_base< S1 > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
This function copies a row or a column from a matrix to a vector.
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
size_type stride1() const
Returns the number of rows.
Definition: matrix_def.hpp:232
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
std::size_t vcl_size_t
Definition: forwards.h:75
Common routines for single-threaded or OpenMP-enabled execution on CPU.
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
void scaled_rank_1_update(matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:900
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void matrix_row(const matrix_base< NumericT > &mat, unsigned int i, vector_base< NumericT > &vec)
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:248
void givens_next(matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
This function updates the matrix Q. It is part of the tql2 algorithm.
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
A tag class representing transposed matrices.
Definition: forwards.h:220
size_type start2() const
Returns the number of columns.
Definition: matrix_def.hpp:230
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:130
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:240
void prod(MatrixAccT1 &A, MatrixAccT2 &B, MatrixAccT3 &C, vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, NumericT alpha, NumericT beta)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:238
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:331
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:134
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
Simple enable-if variant that uses the SFINAE pattern.
size_type start1() const
Returns the number of rows.
Definition: matrix_def.hpp:228