1 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
52 template<
typename DestNumericT,
typename SrcNumericT>
55 assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
57 DestNumericT * data_A = detail::extract_raw_pointer<DestNumericT>(mat1);
58 SrcNumericT
const * data_B = detail::extract_raw_pointer<SrcNumericT>(mat2);
81 #ifdef VIENNACL_WITH_OPENMP
82 #pragma omp parallel for
84 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
86 wrapper_A(
row, col) =
static_cast<DestNumericT
>(wrapper_B(
row, col));
93 #ifdef VIENNACL_WITH_OPENMP
94 #pragma omp parallel for
96 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
98 wrapper_A(
row, col) =
static_cast<DestNumericT
>(wrapper_B(
row, col));
105 typename SizeT,
typename DistanceT>
109 const NumericT * temp_proxy = detail::extract_raw_pointer<NumericT>(proxy.lhs());
110 NumericT * temp = detail::extract_raw_pointer<NumericT>(temp_trans);
112 vcl_size_t proxy_int_size1=proxy.lhs().internal_size1();
113 vcl_size_t proxy_int_size2=proxy.lhs().internal_size2();
117 #ifdef VIENNACL_WITH_OPENMP
118 #pragma omp parallel for
120 for (
long i2 = 0; i2 < static_cast<long>(proxy_int_size1*proxy_int_size2); ++i2)
125 if (row < proxy.lhs().size1() && col < proxy.lhs().size2())
127 if (proxy.lhs().row_major())
130 proxy.lhs().start2() + proxy.lhs().stride2() * col,
131 proxy_int_size1, proxy_int_size2);
135 temp[new_pos] = temp_proxy[pos];
140 proxy.lhs().start2() + proxy.lhs().stride2() * col, proxy_int_size1,
145 temp[new_pos] = temp_proxy[pos];
151 template<
typename NumericT,
typename ScalarT1>
155 assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
159 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
160 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
162 value_type data_alpha = alpha;
164 data_alpha = -data_alpha;
187 if (reciprocal_alpha)
189 #ifdef VIENNACL_WITH_OPENMP
190 #pragma omp parallel for
192 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
193 for (
vcl_size_t col = 0; col < A_size2; ++col)
194 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha;
198 #ifdef VIENNACL_WITH_OPENMP
199 #pragma omp parallel for
201 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
202 for (
vcl_size_t col = 0; col < A_size2; ++col)
203 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha;
211 if (reciprocal_alpha)
213 #ifdef VIENNACL_WITH_OPENMP
214 #pragma omp parallel for
216 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
218 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha;
222 #ifdef VIENNACL_WITH_OPENMP
223 #pragma omp parallel for
225 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
227 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha;
234 typename ScalarT1,
typename ScalarT2>
243 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
244 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
245 value_type
const * data_C = detail::extract_raw_pointer<value_type>(mat3);
247 value_type data_alpha = alpha;
249 data_alpha = -data_alpha;
251 value_type data_beta = beta;
253 data_beta = -data_beta;
284 if (reciprocal_alpha && reciprocal_beta)
286 #ifdef VIENNACL_WITH_OPENMP
287 #pragma omp parallel for
289 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
290 for (
vcl_size_t col = 0; col < A_size2; ++col)
291 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
293 else if (reciprocal_alpha && !reciprocal_beta)
295 #ifdef VIENNACL_WITH_OPENMP
296 #pragma omp parallel for
298 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
299 for (
vcl_size_t col = 0; col < A_size2; ++col)
300 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
302 else if (!reciprocal_alpha && reciprocal_beta)
304 #ifdef VIENNACL_WITH_OPENMP
305 #pragma omp parallel for
307 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
308 for (
vcl_size_t col = 0; col < A_size2; ++col)
309 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
311 else if (!reciprocal_alpha && !reciprocal_beta)
313 #ifdef VIENNACL_WITH_OPENMP
314 #pragma omp parallel for
316 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
317 for (
vcl_size_t col = 0; col < A_size2; ++col)
318 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
327 if (reciprocal_alpha && reciprocal_beta)
329 #ifdef VIENNACL_WITH_OPENMP
330 #pragma omp parallel for
332 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
334 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
336 else if (reciprocal_alpha && !reciprocal_beta)
338 #ifdef VIENNACL_WITH_OPENMP
339 #pragma omp parallel for
341 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
343 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
345 else if (!reciprocal_alpha && reciprocal_beta)
347 #ifdef VIENNACL_WITH_OPENMP
348 #pragma omp parallel for
350 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
352 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
354 else if (!reciprocal_alpha && !reciprocal_beta)
356 #ifdef VIENNACL_WITH_OPENMP
357 #pragma omp parallel for
359 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
361 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
369 typename ScalarT1,
typename ScalarT2>
378 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
379 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
380 value_type
const * data_C = detail::extract_raw_pointer<value_type>(mat3);
382 value_type data_alpha = alpha;
384 data_alpha = -data_alpha;
386 value_type data_beta = beta;
388 data_beta = -data_beta;
419 if (reciprocal_alpha && reciprocal_beta)
421 #ifdef VIENNACL_WITH_OPENMP
422 #pragma omp parallel for
424 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
425 for (
vcl_size_t col = 0; col < A_size2; ++col)
426 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
428 else if (reciprocal_alpha && !reciprocal_beta)
430 #ifdef VIENNACL_WITH_OPENMP
431 #pragma omp parallel for
433 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
434 for (
vcl_size_t col = 0; col < A_size2; ++col)
435 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
437 else if (!reciprocal_alpha && reciprocal_beta)
439 #ifdef VIENNACL_WITH_OPENMP
440 #pragma omp parallel for
442 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
443 for (
vcl_size_t col = 0; col < A_size2; ++col)
444 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
446 else if (!reciprocal_alpha && !reciprocal_beta)
448 #ifdef VIENNACL_WITH_OPENMP
449 #pragma omp parallel for
451 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
452 for (
vcl_size_t col = 0; col < A_size2; ++col)
453 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
462 if (reciprocal_alpha && reciprocal_beta)
464 #ifdef VIENNACL_WITH_OPENMP
465 #pragma omp parallel for
467 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
469 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
471 else if (reciprocal_alpha && !reciprocal_beta)
473 #ifdef VIENNACL_WITH_OPENMP
474 #pragma omp parallel for
476 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
478 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
480 else if (!reciprocal_alpha && reciprocal_beta)
482 #ifdef VIENNACL_WITH_OPENMP
483 #pragma omp parallel for
485 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
487 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
489 else if (!reciprocal_alpha && !reciprocal_beta)
491 #ifdef VIENNACL_WITH_OPENMP
492 #pragma omp parallel for
494 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
496 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
505 template<
typename NumericT>
510 value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
511 value_type alpha =
static_cast<value_type
>(s);
526 #ifdef VIENNACL_WITH_OPENMP
527 #pragma omp parallel for
529 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
530 for (
vcl_size_t col = 0; col < A_size2; ++col)
531 wrapper_A(static_cast<vcl_size_t>(
row), col) = alpha;
539 #ifdef VIENNACL_WITH_OPENMP
540 #pragma omp parallel for
542 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
544 wrapper_A(
row, static_cast<vcl_size_t>(col)) = alpha;
552 template<
typename NumericT>
557 value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
558 value_type alpha =
static_cast<value_type
>(s);
573 #ifdef VIENNACL_WITH_OPENMP
574 #pragma omp parallel for
576 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
577 wrapper_A(
row,
row) = alpha;
583 #ifdef VIENNACL_WITH_OPENMP
584 #pragma omp parallel for
586 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
587 wrapper_A(
row,
row) = alpha;
591 template<
typename NumericT>
596 value_type *data_A = detail::extract_raw_pointer<value_type>(mat);
597 value_type
const *data_vec = detail::extract_raw_pointer<value_type>(vec);
627 wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
634 wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
638 template<
typename NumericT>
643 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
644 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
672 data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
679 data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
683 template<
typename NumericT>
688 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
689 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
709 data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
716 data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
720 template<
typename NumericT>
725 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
726 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
746 data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
753 data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
768 template<
typename NumericT,
typename OpT>
772 assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
777 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
778 value_type
const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
779 value_type
const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
810 #ifdef VIENNACL_WITH_OPENMP
811 #pragma omp parallel for
813 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
814 for (
vcl_size_t col = 0; col < A_size2; ++col)
815 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col), wrapper_C(
row, col));
826 #ifdef VIENNACL_WITH_OPENMP
827 #pragma omp parallel for
829 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
831 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col), wrapper_C(
row, col));
842 template<
typename NumericT,
typename OpT>
846 assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
851 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
852 value_type
const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
875 #ifdef VIENNACL_WITH_OPENMP
876 #pragma omp parallel for
878 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
879 for (
vcl_size_t col = 0; col < A_size2; ++col)
880 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col));
887 #ifdef VIENNACL_WITH_OPENMP
888 #pragma omp parallel for
890 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
892 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col));
913 template<
typename NumericT>
920 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
921 value_type
const * data_x = detail::extract_raw_pointer<value_type>(vec);
922 value_type * data_result = detail::extract_raw_pointer<value_type>(result);
944 value_type temp = data_x[
start1];
949 for (
vcl_size_t col = 1; col < A_size1; ++col)
951 value_type temp = data_x[col * inc1 +
start1];
960 #ifdef VIENNACL_WITH_OPENMP
961 #pragma omp parallel for
963 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
966 for (
vcl_size_t col = 0; col < A_size2; ++col)
967 temp += data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(
row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
969 data_result[
static_cast<vcl_size_t>(
row) * inc2 + start2] = temp;
978 value_type temp = data_x[
start1];
982 for (
vcl_size_t col = 1; col < A_size2; ++col)
984 value_type temp = data_x[col * inc1 +
start1];
991 #ifdef VIENNACL_WITH_OPENMP
992 #pragma omp parallel for
994 for (
long row = 0; row < static_cast<long>(A_size2); ++
row)
997 for (
vcl_size_t col = 0; col < A_size1; ++col)
998 temp += data_A[
viennacl::column_major::mem_index(col * A_inc1 + A_start1, static_cast<vcl_size_t>(
row) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
1000 data_result[
static_cast<vcl_size_t>(
row) * inc2 + start2] = temp;
1014 template<
typename MatrixAccT1,
typename MatrixAccT2,
typename MatrixAccT3,
typename NumericT>
1015 void prod(MatrixAccT1 & A, MatrixAccT2 & B, MatrixAccT3 & C,
1019 if (C_size1 == 0 || C_size2 == 0 || A_size2 == 0)
1024 vcl_size_t num_blocks_C1 = (C_size1 - 1) / blocksize + 1;
1025 vcl_size_t num_blocks_C2 = (C_size2 - 1) / blocksize + 1;
1026 vcl_size_t num_blocks_A2 = (A_size2 - 1) / blocksize + 1;
1031 #ifdef VIENNACL_WITH_OPENMP
1032 #pragma omp parallel for
1034 for (
long block_idx_i2=0; block_idx_i2<static_cast<long>(num_blocks_C1); ++block_idx_i2)
1037 std::vector<NumericT> buffer_A(blocksize * blocksize);
1038 std::vector<NumericT> buffer_B(blocksize * blocksize);
1039 std::vector<NumericT> buffer_C(blocksize * blocksize);
1042 for (
vcl_size_t block_idx_j=0; block_idx_j<num_blocks_C2; ++block_idx_j)
1051 for (
vcl_size_t block_idx_k=0; block_idx_k<num_blocks_A2; ++block_idx_k)
1062 buffer_A[(i - offset_i) * blocksize + (k - offset_k)] = A(i, k);
1066 buffer_B[(k - offset_k) + (j - offset_j) * blocksize] = B(k, j);
1071 NumericT const * ptrA = &(buffer_A[i*blocksize]);
1074 NumericT const * ptrB = &(buffer_B[j*blocksize]);
1078 temp += ptrA[k] * ptrB[k];
1080 buffer_C[i*blocksize + j] += temp;
1086 if (beta > 0 || beta < 0)
1090 C(i,j) = beta * C(i,j) + alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1096 C(i,j) = alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1111 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2 >
1120 value_type
const * data_A = detail::extract_raw_pointer<value_type>(A);
1121 value_type
const * data_B = detail::extract_raw_pointer<value_type>(B);
1122 value_type * data_C = detail::extract_raw_pointer<value_type>(C);
1149 if (!trans_A && !trans_B)
1157 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1165 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1173 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1181 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1189 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1197 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1205 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1213 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1216 else if (!trans_A && trans_B)
1224 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1232 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1240 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1248 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1256 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1264 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1272 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1280 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1283 else if (trans_A && !trans_B)
1291 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1299 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1307 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1315 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1323 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1331 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1339 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1347 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1350 else if (trans_A && trans_B)
1358 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1366 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1374 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1382 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1390 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1398 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1406 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1414 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1438 template<
typename NumericT,
typename ScalarT>
1440 ScalarT
const & alpha,
vcl_size_t ,
bool reciprocal_alpha,
bool flip_sign_alpha,
1446 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
1447 value_type
const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
1448 value_type
const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
1465 value_type data_alpha = alpha;
1466 if (flip_sign_alpha)
1467 data_alpha = -data_alpha;
1468 if (reciprocal_alpha)
1469 data_alpha =
static_cast<value_type
>(1) / data_alpha;
1475 value_type value_v1 = data_alpha * data_v1[
row * inc1 +
start1];
1476 for (
vcl_size_t col = 0; col < A_size2; ++col)
1482 for (
vcl_size_t col = 0; col < A_size2; ++col)
1484 value_type value_v2 = data_alpha * data_v2[col * inc2 +
start2];
1499 template <
typename NumericT,
typename S1>
1508 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1509 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1510 value_type * data_S = detail::extract_raw_pointer<value_type>(S);
1530 #ifdef VIENNACL_WITH_OPENMP
1531 #pragma omp parallel for
1533 for(
long i2 = 0; i2 < long(size) - 1; i2++)
1536 data_D[start1 + inc1 * i] = data_A[
viennacl::row_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1537 data_S[start2 + inc2 * (i + 1)] = data_A[
viennacl::row_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1539 data_D[start1 + inc1 * (size-1)] = data_A[
viennacl::row_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1544 #ifdef VIENNACL_WITH_OPENMP
1545 #pragma omp parallel for
1547 for(
long i2 = 0; i2 < long(size) - 1; i2++)
1551 data_S[start2 + inc2 * (i + 1)] = data_A[
viennacl::column_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1553 data_D[start1 + inc1 * (size-1)] = data_A[
viennacl::column_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1560 template <
typename NumericT,
typename VectorType>
1577 template <
typename NumericT>
1586 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1587 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1606 for(
vcl_size_t j = row_start; j < A_size1; j++)
1607 ss = ss + data_D[start1 + inc1 * j] * data_A[
viennacl::row_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1608 #ifdef VIENNACL_WITH_OPENMP
1609 #pragma omp parallel for
1611 for(
long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1612 data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1613 data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1614 (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)]* ss);
1622 for(
vcl_size_t j = row_start; j < A_size1; j++)
1623 ss = ss + data_D[start1 + inc1 * j] * data_A[
viennacl::column_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1624 #ifdef VIENNACL_WITH_OPENMP
1625 #pragma omp parallel for
1627 for(
long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1630 (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)]* ss);
1642 template <
typename NumericT>
1649 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1650 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1670 ss = ss + (data_D[start1 + inc1 * j] * data_A[
viennacl::row_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1673 #ifdef VIENNACL_WITH_OPENMP
1674 #pragma omp parallel for
1676 for(
long j = 0; j < static_cast<long>(A_size2); j++)
1678 data_A[
viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)] * sum_Av);
1687 ss = ss + (data_D[start1 + inc1 * j] * data_A[
viennacl::column_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1690 #ifdef VIENNACL_WITH_OPENMP
1691 #pragma omp parallel for
1693 for(
long j = 0; j < static_cast<long>(A_size2); j++)
1695 data_A[
viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)] * sum_Av);
1708 template <
typename NumericT>
1734 template<
typename NumericT>
1744 value_type * data_Q = detail::extract_raw_pointer<value_type>(Q);
1745 value_type * data_tmp1 = detail::extract_raw_pointer<value_type>(tmp1);
1746 value_type * data_tmp2 = detail::extract_raw_pointer<value_type>(tmp2);
1764 for(
int i = m - 1; i >= l; i--)
1766 #ifdef VIENNACL_WITH_OPENMP
1767 #pragma omp parallel for
1769 for(
long k = 0; k < static_cast<long>(Q_size1); k++)
1787 for(
int i = m - 1; i >= l; i--)
1789 #ifdef VIENNACL_WITH_OPENMP
1790 #pragma omp parallel for
1792 for(
long k = 0; k < static_cast<long>(Q_size1); k++)
1821 template <
typename NumericT,
typename S1>
1831 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1832 value_type * data_V = detail::extract_raw_pointer<value_type>(V);
1847 #ifdef VIENNACL_WITH_OPENMP
1848 #pragma omp parallel for
1850 for(
long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
1852 data_V[i -
static_cast<long>(row_start)] = data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1857 #ifdef VIENNACL_WITH_OPENMP
1858 #pragma omp parallel for
1860 for(
long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
1862 data_V[i -
static_cast<long>(row_start)] = data_A[
viennacl::column_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1870 #ifdef VIENNACL_WITH_OPENMP
1871 #pragma omp parallel for
1873 for(
long i = static_cast<long>(col_start); i < static_cast<long>(A_size1); i++)
1875 data_V[i -
static_cast<long>(col_start)] = data_A[
viennacl::row_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1880 #ifdef VIENNACL_WITH_OPENMP
1881 #pragma omp parallel for
1883 for(
long i = static_cast<long>(col_start); i < static_cast<long>(A_size1); i++)
1885 data_V[i -
static_cast<long>(col_start)] = data_A[
viennacl::column_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value.
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
void bidiag_pack_impl(matrix_base< NumericT > &A, vector_base< S1 > &D, vector_base< S1 > &S)
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Worker class for decomposing expression templates.
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
size_type stride2() const
Returns the number of columns.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Determines row and column increments for matrices and matrix proxies.
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
void convert(matrix_base< DestNumericT > &mat1, matrix_base< SrcNumericT > const &mat2)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
Helper array for accessing a strided submatrix embedded in a larger matrix.
void copy_vec(matrix_base< NumericT > &A, vector_base< S1 > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
This function copies a row or a column from a matrix to a vector.
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
size_type stride1() const
Returns the number of rows.
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common routines for single-threaded or OpenMP-enabled execution on CPU.
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
void scaled_rank_1_update(matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void matrix_row(const matrix_base< NumericT > &mat, unsigned int i, vector_base< NumericT > &vec)
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void givens_next(matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
This function updates the matrix Q. It is part of the tql2 algorithm.
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
A tag class representing transposed matrices.
size_type start2() const
Returns the number of columns.
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
void prod(MatrixAccT1 &A, MatrixAccT2 &B, MatrixAccT3 &C, vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, NumericT alpha, NumericT beta)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
T min(const T &lhs, const T &rhs)
Minimum.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
Simple enable-if variant that uses the SFINAE pattern.
size_type start1() const
Returns the number of rows.