1 #ifndef VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_SPARSE_MATRIX_OPERATIONS_HPP_
36 #ifdef VIENNACL_WITH_OPENMP
52 template<
typename NumericT,
unsigned int AlignmentV>
57 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
58 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
59 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
60 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
65 unsigned int row_end = row_buffer[
row+1];
67 switch (info_selector)
70 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
71 value = std::max<NumericT>(value, std::fabs(elements[i]));
75 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
76 value += std::fabs(elements[i]);
80 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
81 value += elements[i] * elements[i];
82 value = std::sqrt(value);
86 for (
unsigned int i = row_buffer[
row]; i < row_end; ++i)
88 if (col_buffer[i] ==
row)
96 result_buf[
row] = value;
110 template<
typename NumericT,
unsigned int AlignmentV>
115 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
116 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
117 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
118 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
119 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
121 #ifdef VIENNACL_WITH_OPENMP
122 #pragma omp parallel for
124 for (
long row = 0; row < static_cast<long>(mat.
size1()); ++
row)
129 dot_prod += elements[i] * vec_buf[col_buffer[i] * vec.
stride() + vec.
start()];
143 template<
typename NumericT,
unsigned int AlignmentV>
148 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
149 unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle1());
150 unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
152 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
153 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
170 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
172 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
175 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
177 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
180 #ifdef VIENNACL_WITH_OPENMP
181 #pragma omp parallel for
183 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
188 for (
vcl_size_t k = row_start; k < row_end; ++k) {
189 temp += sp_mat_elements[k] * d_mat_wrapper_row(static_cast<vcl_size_t>(sp_mat_col_buffer[k]), col);
192 result_wrapper_row(
row, col) = temp;
194 result_wrapper_col(
row, col) = temp;
199 #ifdef VIENNACL_WITH_OPENMP
200 #pragma omp parallel for
202 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
203 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
207 for (
vcl_size_t k = row_start; k < row_end; ++k) {
208 temp += sp_mat_elements[k] * d_mat_wrapper_col(static_cast<vcl_size_t>(sp_mat_col_buffer[k]), static_cast<vcl_size_t>(col));
211 result_wrapper_row(
row, col) = temp;
213 result_wrapper_col(
row, col) = temp;
229 template<
typename NumericT,
unsigned int AlignmentV>
236 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
237 unsigned int const * sp_mat_row_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle1());
238 unsigned int const * sp_mat_col_buffer = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
240 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
241 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
258 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
260 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
263 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
265 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
267 if ( d_mat.lhs().row_major() ) {
268 #ifdef VIENNACL_WITH_OPENMP
269 #pragma omp parallel for
271 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row) {
274 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
276 for (
vcl_size_t k = row_start; k < row_end; ++k) {
277 temp += sp_mat_elements[k] * d_mat_wrapper_row(col, static_cast<vcl_size_t>(sp_mat_col_buffer[k]));
280 result_wrapper_row(
row, col) = temp;
282 result_wrapper_col(
row, col) = temp;
287 #ifdef VIENNACL_WITH_OPENMP
288 #pragma omp parallel for
290 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col) {
295 for (
vcl_size_t k = row_start; k < row_end; ++k) {
296 temp += sp_mat_elements[k] * d_mat_wrapper_col(col, static_cast<vcl_size_t>(sp_mat_col_buffer[k]));
299 result_wrapper_row(
row, col) = temp;
301 result_wrapper_col(
row, col) = temp;
318 template<
typename NumericT,
unsigned int AlignmentV>
324 NumericT const * A_elements = detail::extract_raw_pointer<NumericT>(A.
handle());
325 unsigned int const * A_row_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle1());
326 unsigned int const * A_col_buffer = detail::extract_raw_pointer<unsigned int>(A.
handle2());
328 NumericT const * B_elements = detail::extract_raw_pointer<NumericT>(B.
handle());
329 unsigned int const * B_row_buffer = detail::extract_raw_pointer<unsigned int>(B.
handle1());
330 unsigned int const * B_col_buffer = detail::extract_raw_pointer<unsigned int>(B.
handle2());
333 unsigned int * C_row_buffer = detail::extract_raw_pointer<unsigned int>(C.
handle1());
335 #if defined(VIENNACL_WITH_OPENMP)
336 unsigned int block_factor = 10;
337 unsigned int max_threads = omp_get_max_threads();
339 unsigned int max_threads = 1;
341 std::vector<unsigned int> max_length_row_C(max_threads);
342 std::vector<unsigned int *> row_C_temp_index_buffers(max_threads);
343 std::vector<NumericT *> row_C_temp_value_buffers(max_threads);
350 #if defined(VIENNACL_WITH_OPENMP)
351 #pragma omp parallel for schedule(dynamic, A.size1() / (block_factor * max_threads) + 1)
353 for (
long i=0; i<long(A.
size1()); ++i)
355 unsigned int row_start_A = A_row_buffer[i];
356 unsigned int row_end_A = A_row_buffer[i+1];
358 unsigned int row_C_upper_bound_row = 0;
359 for (
unsigned int j = row_start_A; j<row_end_A; ++j)
361 unsigned int row_B = A_col_buffer[j];
363 unsigned int entries_in_row = B_row_buffer[row_B+1] - B_row_buffer[row_B];
364 row_C_upper_bound_row += entries_in_row;
367 #ifdef VIENNACL_WITH_OPENMP
368 unsigned int thread_id = omp_get_thread_num();
370 unsigned int thread_id = 0;
373 max_length_row_C[thread_id] =
std::max(max_length_row_C[thread_id],
std::min(row_C_upper_bound_row, static_cast<unsigned int>(B.
size2())));
377 for (std::size_t i=1; i<max_length_row_C.size(); ++i)
378 max_length_row_C[0] =
std::max(max_length_row_C[0], max_length_row_C[i]);
381 for (
unsigned int i=0; i<max_threads; ++i)
382 row_C_temp_index_buffers[i] = (
unsigned int *)malloc(
sizeof(
unsigned int)*3*max_length_row_C[0]);
389 #ifdef VIENNACL_WITH_OPENMP
390 #pragma omp parallel for schedule(dynamic, A.size1() / (block_factor * max_threads) + 1)
392 for (
long i=0; i<long(A.
size1()); ++i)
394 unsigned int thread_id = 0;
395 #ifdef VIENNACL_WITH_OPENMP
396 thread_id = omp_get_thread_num();
398 unsigned int buffer_len = max_length_row_C[0];
400 unsigned int *row_C_vector_1 = row_C_temp_index_buffers[thread_id];
401 unsigned int *row_C_vector_2 = row_C_vector_1 + buffer_len;
402 unsigned int *row_C_vector_3 = row_C_vector_2 + buffer_len;
404 unsigned int row_start_A = A_row_buffer[i];
405 unsigned int row_end_A = A_row_buffer[i+1];
408 B_row_buffer, B_col_buffer, static_cast<unsigned int>(B.
size2()),
409 row_C_vector_1, row_C_vector_2, row_C_vector_3);
413 unsigned int current_offset = 0;
414 for (std::size_t i=0; i<C.
size1(); ++i)
416 unsigned int tmp = C_row_buffer[i];
417 C_row_buffer[i] = current_offset;
418 current_offset += tmp;
420 C_row_buffer[C.
size1()] = current_offset;
421 C.
reserve(current_offset,
false);
424 for (
unsigned int i=0; i<max_threads; ++i)
425 row_C_temp_value_buffers[i] = (
NumericT *)malloc(
sizeof(
NumericT)*3*max_length_row_C[0]);
430 NumericT * C_elements = detail::extract_raw_pointer<NumericT>(C.
handle());
431 unsigned int * C_col_buffer = detail::extract_raw_pointer<unsigned int>(C.
handle2());
433 #ifdef VIENNACL_WITH_OPENMP
434 #pragma omp parallel for schedule(dynamic, A.size1() / (block_factor * max_threads) + 1)
436 for (
long i = 0; i < long(A.
size1()); ++i)
438 unsigned int row_start_A = A_row_buffer[i];
439 unsigned int row_end_A = A_row_buffer[i+1];
441 unsigned int row_C_buffer_start = C_row_buffer[i];
442 unsigned int row_C_buffer_end = C_row_buffer[i+1];
444 #ifdef VIENNACL_WITH_OPENMP
445 unsigned int thread_id = omp_get_thread_num();
447 unsigned int thread_id = 0;
450 unsigned int *row_C_vector_1 = row_C_temp_index_buffers[thread_id];
451 unsigned int *row_C_vector_2 = row_C_vector_1 + max_length_row_C[0];
452 unsigned int *row_C_vector_3 = row_C_vector_2 + max_length_row_C[0];
454 NumericT *row_C_vector_1_values = row_C_temp_value_buffers[thread_id];
455 NumericT *row_C_vector_2_values = row_C_vector_1_values + max_length_row_C[0];
456 NumericT *row_C_vector_3_values = row_C_vector_2_values + max_length_row_C[0];
459 B_row_buffer, B_col_buffer, B_elements, static_cast<unsigned int>(B.
size2()),
460 row_C_buffer_start, row_C_buffer_end, C_col_buffer, C_elements,
461 row_C_vector_1, row_C_vector_1_values,
462 row_C_vector_2, row_C_vector_2_values,
463 row_C_vector_3, row_C_vector_3_values);
467 for (
unsigned int i=0; i<max_threads; ++i)
469 free(row_C_temp_index_buffers[i]);
470 free(row_C_temp_value_buffers[i]);
483 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
485 IndexArrayT
const & col_buffer,
486 ConstScalarArrayT
const & element_buffer,
487 ScalarArrayT & vec_buffer,
496 for (
vcl_size_t i = row_begin; i < row_end; ++i)
500 vec_entry -= vec_buffer[col_index] * element_buffer[i];
502 vec_buffer[
row] = vec_entry;
507 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
509 IndexArrayT
const & col_buffer,
510 ConstScalarArrayT
const & element_buffer,
511 ScalarArrayT & vec_buffer,
523 for (
vcl_size_t i = row_begin; i < row_end; ++i)
527 vec_entry -= vec_buffer[col_index] * element_buffer[i];
528 else if (col_index ==
row)
529 diagonal_entry = element_buffer[i];
532 vec_buffer[
row] = vec_entry / diagonal_entry;
538 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
540 IndexArrayT
const & col_buffer,
541 ConstScalarArrayT
const & element_buffer,
542 ScalarArrayT & vec_buffer,
546 for (
vcl_size_t row2 = 1; row2 < num_cols; ++row2)
552 for (
vcl_size_t i = row_begin; i < row_end; ++i)
556 vec_entry -= vec_buffer[col_index] * element_buffer[i];
558 vec_buffer[
row] = vec_entry;
562 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
564 IndexArrayT
const & col_buffer,
565 ConstScalarArrayT
const & element_buffer,
566 ScalarArrayT & vec_buffer,
570 for (
vcl_size_t row2 = 0; row2 < num_cols; ++row2)
579 for (
vcl_size_t i = row_begin; i < row_end; ++i)
583 vec_entry -= vec_buffer[col_index] * element_buffer[i];
584 else if (col_index == row)
585 diagonal_entry = element_buffer[i];
588 vec_buffer[
row] = vec_entry / diagonal_entry;
602 template<
typename NumericT,
unsigned int AlignmentV>
607 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
608 NumericT const * elements = detail::extract_raw_pointer<NumericT>(L.
handle());
609 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
610 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
612 detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, L.
size2(), tag);
621 template<
typename NumericT,
unsigned int AlignmentV>
626 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
627 NumericT const * elements = detail::extract_raw_pointer<NumericT>(L.
handle());
628 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle1());
629 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(L.
handle2());
631 detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, L.
size2(), tag);
641 template<
typename NumericT,
unsigned int AlignmentV>
646 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
647 NumericT const * elements = detail::extract_raw_pointer<NumericT>(U.
handle());
648 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
649 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
651 detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, U.
size2(), tag);
660 template<
typename NumericT,
unsigned int AlignmentV>
665 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
666 NumericT const * elements = detail::extract_raw_pointer<NumericT>(U.
handle());
667 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle1());
668 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(U.
handle2());
670 detail::csr_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, U.
size2(), tag);
685 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
687 IndexArrayT
const & col_buffer,
688 ConstScalarArrayT
const & element_buffer,
689 ScalarArrayT & vec_buffer,
694 for (
vcl_size_t col = 0; col < num_cols; ++col)
696 NumericT vec_entry = vec_buffer[col];
698 for (
vcl_size_t i = col_begin; i < col_end; ++i)
700 unsigned int row_index = col_buffer[i];
702 vec_buffer[row_index] -= vec_entry * element_buffer[i];
708 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
710 IndexArrayT
const & col_buffer,
711 ConstScalarArrayT
const & element_buffer,
712 ScalarArrayT & vec_buffer,
717 for (
vcl_size_t col = 0; col < num_cols; ++col)
723 for (
vcl_size_t i = col_begin; i < col_end; ++i)
726 if (row_index == col)
728 diagonal_entry = element_buffer[i];
734 NumericT vec_entry = vec_buffer[col] / diagonal_entry;
735 vec_buffer[col] = vec_entry;
736 for (
vcl_size_t i = col_begin; i < col_end; ++i)
740 vec_buffer[row_index] -= vec_entry * element_buffer[i];
746 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
748 IndexArrayT
const & col_buffer,
749 ConstScalarArrayT
const & element_buffer,
750 ScalarArrayT & vec_buffer,
754 for (
vcl_size_t col2 = 0; col2 < num_cols; ++col2)
758 NumericT vec_entry = vec_buffer[col];
761 for (
vcl_size_t i = col_begin; i < col_end; ++i)
765 vec_buffer[row_index] -= vec_entry * element_buffer[i];
771 template<
typename NumericT,
typename ConstScalarArrayT,
typename ScalarArrayT,
typename IndexArrayT>
773 IndexArrayT
const & col_buffer,
774 ConstScalarArrayT
const & element_buffer,
775 ScalarArrayT & vec_buffer,
779 for (
vcl_size_t col2 = 0; col2 < num_cols; ++col2)
787 for (
vcl_size_t i = col_begin; i < col_end; ++i)
790 if (row_index == col)
792 diagonal_entry = element_buffer[i];
798 NumericT vec_entry = vec_buffer[col] / diagonal_entry;
799 vec_buffer[col] = vec_entry;
800 for (
vcl_size_t i = col_begin; i < col_end; ++i)
804 vec_buffer[row_index] -= vec_entry * element_buffer[i];
813 template<
typename NumericT,
unsigned int AlignmentV>
824 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
825 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
826 NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.lhs().handle());
827 NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
830 for (
vcl_size_t col = 0; col < L.lhs().size1(); ++col)
832 NumericT vec_entry = vec_buffer[col];
834 for (
vcl_size_t i = col_begin; i < col_end; ++i)
836 unsigned int row_index = col_buffer[i];
838 vec_buffer[row_index] -= vec_entry * elements[i];
844 template<
typename NumericT,
unsigned int AlignmentV>
855 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle1());
856 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.lhs().handle2());
857 NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.lhs().handle());
858 NumericT const * diagonal_buffer = detail::extract_raw_pointer<NumericT>(L_diagonal.
handle());
859 NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
862 for (
vcl_size_t col = 0; col < L.lhs().size1(); ++col)
866 NumericT vec_entry = vec_buffer[col] / diagonal_buffer[col];
867 vec_buffer[col] = vec_entry;
868 for (
vcl_size_t i = col_begin; i < col_end; ++i)
872 vec_buffer[row_index] -= vec_entry * elements[i];
880 template<
typename NumericT,
unsigned int AlignmentV>
891 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
892 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
893 NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.lhs().handle());
894 NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
896 for (
vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
898 vcl_size_t col = (U.lhs().size1() - col2) - 1;
900 NumericT vec_entry = vec_buffer[col];
903 for (
vcl_size_t i = col_begin; i < col_end; ++i)
907 vec_buffer[row_index] -= vec_entry * elements[i];
913 template<
typename NumericT,
unsigned int AlignmentV>
924 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle1());
925 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.lhs().handle2());
926 NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.lhs().handle());
927 NumericT const * diagonal_buffer = detail::extract_raw_pointer<NumericT>(U_diagonal.
handle());
928 NumericT * vec_buffer = detail::extract_raw_pointer<NumericT>(vec.
handle());
930 for (
vcl_size_t col2 = 0; col2 < U.lhs().size1(); ++col2)
932 vcl_size_t col = (U.lhs().size1() - col2) - 1;
937 NumericT vec_entry = vec_buffer[col] / diagonal_buffer[col];
938 vec_buffer[col] = vec_entry;
939 for (
vcl_size_t i = col_begin; i < col_end; ++i)
943 vec_buffer[row_index] -= vec_entry * elements[i];
957 template<
typename NumericT,
unsigned int AlignmentV>
964 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
965 NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
966 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
967 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
969 detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
978 template<
typename NumericT,
unsigned int AlignmentV>
985 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
986 NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
987 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
988 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
990 detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
1000 template<
typename NumericT,
unsigned int AlignmentV>
1007 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1008 NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
1009 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
1010 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
1012 detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
1022 template<
typename NumericT,
unsigned int AlignmentV>
1029 NumericT * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1030 NumericT const * elements = detail::extract_raw_pointer<NumericT>(proxy.lhs().handle());
1031 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle1());
1032 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(proxy.lhs().handle2());
1034 detail::csr_trans_inplace_solve<NumericT>(row_buffer, col_buffer, elements, vec_buf, proxy.lhs().size1(), tag);
1051 template<
typename NumericT>
1056 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1057 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1058 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1059 unsigned int const * row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle1());
1060 unsigned int const * row_indices = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1061 unsigned int const * col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1065 #ifdef VIENNACL_WITH_OPENMP
1066 #pragma omp parallel for
1068 for (
long i = 0; i < static_cast<long>(mat.
nnz1()); ++i)
1072 for (
vcl_size_t j = row_buffer[i]; j < row_end; ++j)
1073 dot_prod += elements[j] * vec_buf[col_buffer[j] * vec.
stride() + vec.
start()];
1087 template<
typename NumericT,
unsigned int AlignmentV>
1092 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1093 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1094 unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle12());
1097 unsigned int last_row = 0;
1101 unsigned int current_row = coord_buffer[2*i];
1103 if (current_row != last_row)
1106 value = std::sqrt(value);
1108 result_buf[last_row] = value;
1110 last_row = current_row;
1113 switch (info_selector)
1116 value = std::max<NumericT>(value, std::fabs(elements[i]));
1120 value += std::fabs(elements[i]);
1124 value += elements[i] * elements[i];
1128 if (coord_buffer[2*i+1] == current_row)
1129 value = elements[i];
1138 value = std::sqrt(value);
1140 result_buf[last_row] = value;
1152 template<
typename NumericT,
unsigned int AlignmentV>
1157 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1158 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1159 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1160 unsigned int const * coord_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle12());
1163 result_buf[i * result.
stride() + result.
start()] = 0;
1166 result_buf[coord_buffer[2*i] * result.
stride() + result.
start()]
1167 += elements[i] * vec_buf[coord_buffer[2*i+1] * vec.
stride() + vec.
start()];
1178 template<
typename NumericT,
unsigned int AlignmentV>
1183 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
1184 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle12());
1186 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1187 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1204 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1206 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1209 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1211 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1215 #ifdef VIENNACL_WITH_OPENMP
1216 #pragma omp parallel for
1218 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1228 #ifdef VIENNACL_WITH_OPENMP
1229 #pragma omp parallel for
1231 for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
1236 NumericT y = d_mat_wrapper_row( c, col);
1238 result_wrapper_row(r, col) += x * y;
1240 result_wrapper_col(r, col) += x * y;
1247 #ifdef VIENNACL_WITH_OPENMP
1248 #pragma omp parallel for
1250 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col)
1260 #ifdef VIENNACL_WITH_OPENMP
1261 #pragma omp parallel for
1263 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
1270 NumericT y = d_mat_wrapper_col( c, col);
1273 result_wrapper_row( r, col) += x*y;
1275 result_wrapper_col( r, col) += x*y;
1292 template<
typename NumericT,
unsigned int AlignmentV>
1299 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
1300 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle12());
1302 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1303 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1320 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1322 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1325 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1327 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1329 if ( d_mat.lhs().row_major() )
1331 #ifdef VIENNACL_WITH_OPENMP
1332 #pragma omp parallel for
1334 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1337 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1340 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1344 #ifdef VIENNACL_WITH_OPENMP
1345 #pragma omp parallel for
1347 for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
1353 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1354 NumericT y = d_mat_wrapper_row( col, c);
1355 result_wrapper_row(r, col) += x * y;
1360 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1361 NumericT y = d_mat_wrapper_row( col, c);
1362 result_wrapper_col(r, col) += x * y;
1371 #ifdef VIENNACL_WITH_OPENMP
1372 #pragma omp parallel for
1374 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1384 #ifdef VIENNACL_WITH_OPENMP
1385 #pragma omp parallel for
1387 for (
long i = 0; i < static_cast<long>(sp_mat.
nnz()); ++i) {
1393 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1394 NumericT y = d_mat_wrapper_col( col, c);
1395 result_wrapper_row(r, col) += x * y;
1400 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1401 NumericT y = d_mat_wrapper_col( col, c);
1402 result_wrapper_col(r, col) += x * y;
1420 template<
typename NumericT,
unsigned int AlignmentV>
1425 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1426 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1427 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1428 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1434 for (
unsigned int item_id = 0; item_id < mat.
internal_maxnnz(); ++item_id)
1439 if (val > 0 || val < 0)
1441 unsigned int col = coords[offset];
1442 sum += (vec_buf[col * vec.
stride() + vec.
start()] * val);
1458 template<
typename NumericT,
unsigned int AlignmentV>
1463 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
1464 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
1466 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1467 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1484 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1486 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1489 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1491 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1494 #ifdef VIENNACL_WITH_OPENMP
1495 #pragma omp parallel for
1497 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1507 #ifdef VIENNACL_WITH_OPENMP
1508 #pragma omp parallel for
1510 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1512 for (
long item_id = 0; item_id < static_cast<long>(sp_mat.
maxnnz()); ++item_id)
1518 if (sp_mat_val < 0 || sp_mat_val > 0)
1522 result_wrapper_row(static_cast<vcl_size_t>(
row), col) += sp_mat_val * d_mat_wrapper_row( sp_mat_col, col);
1525 result_wrapper_col(static_cast<vcl_size_t>(
row), col) += sp_mat_val * d_mat_wrapper_row( sp_mat_col, col);
1531 #ifdef VIENNACL_WITH_OPENMP
1532 #pragma omp parallel for
1534 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col)
1537 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1540 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1544 #ifdef VIENNACL_WITH_OPENMP
1545 #pragma omp parallel for
1547 for (
long col = 0; col < static_cast<long>(d_mat.
size2()); ++col) {
1549 for (
unsigned int item_id = 0; item_id < sp_mat.
maxnnz(); ++item_id) {
1557 if (sp_mat_val < 0 || sp_mat_val > 0)
1560 result_wrapper_row(
row, col) += sp_mat_val * d_mat_wrapper_col( sp_mat_col, col);
1562 result_wrapper_col(
row, col) += sp_mat_val * d_mat_wrapper_col( sp_mat_col, col);
1580 template<
typename NumericT,
unsigned int AlignmentV>
1587 NumericT const * sp_mat_elements = detail::extract_raw_pointer<NumericT>(sp_mat.
handle());
1588 unsigned int const * sp_mat_coords = detail::extract_raw_pointer<unsigned int>(sp_mat.
handle2());
1590 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat.lhs());
1591 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1608 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1610 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1613 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1615 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1617 if ( d_mat.lhs().row_major() )
1619 #ifdef VIENNACL_WITH_OPENMP
1620 #pragma omp parallel for
1622 for (
long row = 0; row < static_cast<long>(sp_mat.
size1()); ++
row)
1625 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1628 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1632 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col) {
1634 for (
unsigned int item_id = 0; item_id < sp_mat.
maxnnz(); ++item_id) {
1642 if (sp_mat_val < 0 || sp_mat_val > 0)
1645 result_wrapper_row(
row, col) += sp_mat_val * d_mat_wrapper_row( col, sp_mat_col);
1647 result_wrapper_col(
row, col) += sp_mat_val * d_mat_wrapper_row( col, sp_mat_col);
1655 #ifdef VIENNACL_WITH_OPENMP
1656 #pragma omp parallel for
1658 for (
long col = 0; col < static_cast<long>(d_mat.size2()); ++col)
1668 #ifdef VIENNACL_WITH_OPENMP
1669 #pragma omp parallel for
1671 for (
long item_id = 0; item_id < static_cast<long>(sp_mat.
maxnnz()); ++item_id) {
1679 if (sp_mat_val < 0 || sp_mat_val > 0)
1682 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1683 result_wrapper_row(
row, col) += sp_mat_val * d_mat_wrapper_col( col, sp_mat_col);
1685 for (
vcl_size_t col = 0; col < d_mat.size2(); ++col)
1686 result_wrapper_col(
row, col) += sp_mat_val * d_mat_wrapper_col( col, sp_mat_col);
1706 template<
typename NumericT,
typename IndexT>
1711 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1712 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1713 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1714 IndexT
const * columns_per_block = detail::extract_raw_pointer<IndexT>(mat.
handle1());
1715 IndexT
const * column_indices = detail::extract_raw_pointer<IndexT>(mat.
handle2());
1716 IndexT
const * block_start = detail::extract_raw_pointer<IndexT>(mat.
handle3());
1720 #ifdef VIENNACL_WITH_OPENMP
1721 #pragma omp parallel for
1723 for (
long block_idx2 = 0; block_idx2 < static_cast<long>(num_blocks); ++block_idx2)
1726 vcl_size_t current_columns_per_block = columns_per_block[block_idx];
1730 for (IndexT column_entry_index = 0;
1731 column_entry_index < current_columns_per_block;
1732 ++column_entry_index)
1737 for (IndexT row_in_block = 0; row_in_block < mat.
rows_per_block(); ++row_in_block)
1739 NumericT val = elements[stride_start + row_in_block];
1741 result_values[row_in_block] += (val > 0 || val < 0) ? vec_buf[column_indices[stride_start + row_in_block] * vec.
stride() + vec.
start()] * val : 0;
1746 for (IndexT row_in_block = 0; row_in_block < mat.
rows_per_block(); ++row_in_block)
1748 if (first_row_in_matrix + row_in_block < result.
size())
1749 result_buf[(first_row_in_matrix + row_in_block) * result.
stride() + result.
start()] = result_values[row_in_block];
1766 template<
typename NumericT,
unsigned int AlignmentV>
1771 NumericT * result_buf = detail::extract_raw_pointer<NumericT>(result.
handle());
1772 NumericT const * vec_buf = detail::extract_raw_pointer<NumericT>(vec.
handle());
1773 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1774 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1775 NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.
handle5());
1776 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1777 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
1787 for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
1792 if (val > 0 || val < 0)
1794 unsigned int col = coords[offset];
1795 sum += (vec_buf[col * vec.
stride() + vec.
start()] * val);
1805 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1807 sum += (vec_buf[csr_col_buffer[item_id] * vec.
stride() + vec.
start()] * csr_elements[item_id]);
1826 template<
typename NumericT,
unsigned int AlignmentV>
1831 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1832 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1849 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1851 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1854 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1856 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1858 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1859 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1860 NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.
handle5());
1861 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1862 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
1865 for (
vcl_size_t result_col = 0; result_col < result.
size2(); ++result_col)
1874 for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
1879 if (val < 0 || val > 0)
1883 sum += d_mat_wrapper_row(col, result_col) * val;
1885 sum += d_mat_wrapper_col(col, result_col) * val;
1896 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1897 sum += d_mat_wrapper_row(static_cast<vcl_size_t>(csr_col_buffer[item_id]), result_col) * csr_elements[item_id];
1899 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1900 sum += d_mat_wrapper_col(static_cast<vcl_size_t>(csr_col_buffer[item_id]), result_col) * csr_elements[item_id];
1903 result_wrapper_row(
row, result_col) =
sum;
1905 result_wrapper_col(
row, result_col) =
sum;
1919 template<
typename NumericT,
unsigned int AlignmentV>
1926 NumericT const * d_mat_data = detail::extract_raw_pointer<NumericT>(d_mat);
1927 NumericT * result_data = detail::extract_raw_pointer<NumericT>(result);
1944 d_mat_wrapper_row(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1946 d_mat_wrapper_col(d_mat_data, d_mat_start1, d_mat_start2, d_mat_inc1, d_mat_inc2, d_mat_internal_size1, d_mat_internal_size2);
1949 result_wrapper_row(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1951 result_wrapper_col(result_data, result_start1, result_start2, result_inc1, result_inc2, result_internal_size1, result_internal_size2);
1953 NumericT const * elements = detail::extract_raw_pointer<NumericT>(mat.
handle());
1954 unsigned int const * coords = detail::extract_raw_pointer<unsigned int>(mat.
handle2());
1955 NumericT const * csr_elements = detail::extract_raw_pointer<NumericT>(mat.
handle5());
1956 unsigned int const * csr_row_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle3());
1957 unsigned int const * csr_col_buffer = detail::extract_raw_pointer<unsigned int>(mat.
handle4());
1960 for (
vcl_size_t result_col = 0; result_col < result.
size2(); ++result_col)
1969 for (
unsigned int item_id = 0; item_id < mat.
internal_ellnnz(); ++item_id)
1974 if (val < 0 || val > 0)
1977 if (d_mat.lhs().row_major())
1978 sum += d_mat_wrapper_row(result_col, col) * val;
1980 sum += d_mat_wrapper_col(result_col, col) * val;
1990 if (d_mat.lhs().row_major())
1991 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1992 sum += d_mat_wrapper_row(result_col, static_cast<vcl_size_t>(csr_col_buffer[item_id])) * csr_elements[item_id];
1994 for (
vcl_size_t item_id = col_begin; item_id < col_end; item_id++)
1995 sum += d_mat_wrapper_col(result_col, static_cast<vcl_size_t>(csr_col_buffer[item_id])) * csr_elements[item_id];
1998 result_wrapper_row(
row, result_col) =
sum;
2000 result_wrapper_col(
row, result_col) =
sum;
const vcl_size_t & size2() const
Returns the number of columns.
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
viennacl::scalar_expression< const viennacl::vector_base< NumericT >, const viennacl::vector_base< NumericT >, viennacl::op_sum > sum(viennacl::vector_base< NumericT > const &x)
User interface function for computing the sum of all elements of a vector.
const handle_type & handle4() const
const vcl_size_t & size1() const
Returns the number of rows.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
A tag class representing a lower triangular matrix.
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
vcl_size_t internal_ellnnz() const
Expression template class for representing a tree of expressions which ultimately result in a matrix...
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
vcl_size_t nnz() const
Returns the number of nonzero entries.
void dot_prod(MatrixT const &A, unsigned int beg_ind, NumericT &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
T max(const T &lhs, const T &rhs)
Maximum.
vcl_size_t rows_per_block() const
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1() const
size_type stride() const
Returns the stride within the buffer (in multiples of sizeof(NumericT))
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void row_C_scan_numeric_vector(unsigned int row_start_A, unsigned int row_end_A, unsigned int const *A_col_buffer, NumericT const *A_elements, unsigned int const *B_row_buffer, unsigned int const *B_col_buffer, NumericT const *B_elements, unsigned int B_size2, unsigned int row_start_C, unsigned int row_end_C, unsigned int *C_col_buffer, NumericT *C_elements, unsigned int *row_C_vector_1, NumericT *row_C_vector_1_values, unsigned int *row_C_vector_2, NumericT *row_C_vector_2_values, unsigned int *row_C_vector_3, NumericT *row_C_vector_3_values)
result_of::size_type< T >::type start2(T const &obj)
Helper array for accessing a strided submatrix embedded in a larger matrix.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
const handle_type & handle2() const
A tag class representing an upper triangular matrix.
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
vcl_size_t internal_size1() const
Sparse matrix class using the sliced ELLPACK with parameters C, .
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
unsigned int row_C_scan_symbolic_vector(unsigned int row_start_A, unsigned int row_end_A, unsigned int const *A_col_buffer, unsigned int const *B_row_buffer, unsigned int const *B_col_buffer, unsigned int B_size2, unsigned int *row_C_vector_1, unsigned int *row_C_vector_2, unsigned int *row_C_vector_3)
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &, vcl_size_t, vector_base< NumericT > const &, vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag)
size_type size2() const
Returns the number of columns.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
vcl_size_t maxnnz() const
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
void reserve(vcl_size_t new_nonzeros, bool preserve=true)
Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
size_type size() const
Returns the length of the vector (cf. std::vector)
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
void csr_inplace_solve(IndexArrayT const &row_buffer, IndexArrayT const &col_buffer, ConstScalarArrayT const &element_buffer, ScalarArrayT &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
const handle_type & handle() const
A tag class representing a lower triangular matrix with unit diagonal.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
A tag class representing transposed matrices.
void row_info(compressed_matrix< NumericT, AlignmentV > const &mat, vector_base< NumericT > &vec, viennacl::linalg::detail::row_info_types info_selector)
A sparse square matrix in compressed sparse rows format.
T min(const T &lhs, const T &rhs)
Minimum.
size_type start() const
Returns the offset within the buffer.
vcl_size_t size1() const
Returns the number of rows.
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve=true)
Resize the matrix.
const handle_type & handle() const
Returns the memory handle.
void csr_trans_inplace_solve(IndexArrayT const &row_buffer, IndexArrayT const &col_buffer, ConstScalarArrayT const &element_buffer, ScalarArrayT &vec_buffer, vcl_size_t num_cols, viennacl::linalg::unit_lower_tag)
const handle_type & handle3() const
A tag class representing an upper triangular matrix with unit diagonal.
const handle_type & handle5() const
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
Implementations of NMF operations using a plain single-threaded or OpenMP-enabled execution on CPU...