58 template<
typename NumericT>
61 if (std::fabs(s1 - s2) > 0)
62 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
66 template<
typename NumericT>
69 std::vector<NumericT> v2_cpu(v2.
size());
73 for (std::size_t i=0;i<v1.size(); ++i)
75 if (
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
76 v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) /
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
80 if (v2_cpu[i] > 0.0001)
83 std::cout <<
"Error at entry " << i <<
": " << v1[i] <<
" vs. " << v2[i] << std::endl;
90 for (std::size_t i=0;i<v2_cpu.size(); ++i)
91 inf_norm = std::max<NumericT>(inf_norm, std::fabs(v2_cpu[i]));
96 template<
typename NumericT>
100 for (std::size_t i=0; i<A2.
size1(); ++i)
101 for (std::size_t j=0; j<A2.
size2(); ++j)
109 return diff(host_values, vcl_device_values);
113 template<
typename HostContainerT,
typename DeviceContainerT,
typename NumericT>
114 void check(HostContainerT
const & host_container, DeviceContainerT
const & device_container,
115 std::string current_stage,
NumericT epsilon)
117 current_stage.resize(25,
' ');
118 std::cout <<
"Testing operation: " << current_stage;
119 NumericT rel_error = std::fabs(
diff(host_container, device_container));
121 if (rel_error > epsilon)
123 std::cout << std::endl;
124 std::cout <<
"# Error at operation: " << current_stage << std::endl;
125 std::cout <<
" diff: " << rel_error << std::endl;
128 std::cout <<
"PASS" << std::endl;
134 template<
typename LHS,
typename RHS>
135 static void apply(LHS & lhs, RHS
const & rhs) { lhs = rhs; }
137 static std::string
str() {
return "="; }
142 template<
typename LHS,
typename RHS>
143 static void apply(LHS & lhs, RHS
const & rhs) { lhs += rhs; }
145 static std::string
str() {
return "+="; }
150 template<
typename LHS,
typename RHS>
151 static void apply(LHS & lhs, RHS
const & rhs) { lhs -= rhs; }
153 static std::string
str() {
return "-="; }
160 template<
typename OpT,
typename NumericT,
typename HostMatrixT,
typename DeviceMatrixT>
162 HostMatrixT & host_A, HostMatrixT & host_B, HostMatrixT & host_C,
163 DeviceMatrixT & device_A, std::string name_A,
164 DeviceMatrixT & device_B, std::string name_B,
165 DeviceMatrixT & device_C,
bool copy_from_A,
166 bool trans_first,
bool trans_second)
170 for (std::size_t i = 0; i<host_A.size(); ++i)
171 for (std::size_t j = 0; j<host_A[i].size(); ++j)
173 host_A[i][j] = randomNumber();
174 host_B[i][j] = randomNumber();
184 for (std::size_t i = 0; i<host_A.size(); ++i)
185 for (std::size_t j = 0; j<host_A[i].size(); ++j)
188 for (std::size_t k = 0; k<host_A[i].size(); ++k)
189 tmp += (trans_first ? host_A[k][i] : host_A[i][k])
190 * (trans_second ? host_B[j][k] : host_B[k][j]);
191 OpT::apply(host_C[i][j], tmp);
194 if (trans_first && trans_second)
197 check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"^T*") + name_B + std::string(
"^T"), epsilon);
199 else if (trans_first && !trans_second)
202 check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"^T*") + name_B + std::string(
""), epsilon);
204 else if (!trans_first && trans_second)
207 check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"*") + name_B + std::string(
"^T"), epsilon);
212 check(host_C, device_C, std::string(
"A ") + OpT::str() + std::string(
" ") + name_A + std::string(
"*") + name_B + std::string(
""), epsilon);
218 template<
typename OpT,
typename NumericT,
typename HostMatrixT,
typename DeviceMatrixT>
220 HostMatrixT & host_A, HostMatrixT & host_B, HostMatrixT & host_C,
221 DeviceMatrixT & device_A, std::string name_A,
222 DeviceMatrixT & device_B, std::string name_B,
223 DeviceMatrixT & device_C,
bool copy_from_A)
225 test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A,
false,
false);
226 test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A,
false,
true);
227 test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A,
true,
false);
228 test_gemm<OpT>(epsilon, host_A, host_B, host_C, device_A, name_A, device_B, name_B, device_C, copy_from_A,
true,
true);
234 template<
typename NumericT>
245 std::vector<NumericT> std_x(N);
246 std::vector<NumericT> std_y(N);
247 std::vector<NumericT> std_z(N);
249 for (std::size_t i=0; i<std_x.size(); ++i)
251 for (std::size_t i=0; i<std_y.size(); ++i)
253 for (std::size_t i=0; i<std_z.size(); ++i)
266 check(std_x, vcl_x,
"x = x", epsilon);
269 std_x[0] = std_x[2]; std_x[1] = std_x[3];
271 check(std_x, vcl_x,
"x = x (range)", epsilon);
277 std::vector<std::vector<NumericT> > std_A(N, std::vector<NumericT>(N,
NumericT(1)));
278 std::vector<std::vector<NumericT> > std_B(N, std::vector<NumericT>(N,
NumericT(2)));
279 std::vector<std::vector<NumericT> > std_C(N, std::vector<NumericT>(N,
NumericT(3)));
291 check(std_A, vcl_A,
"A = A", epsilon);
294 std_A[0][0] = std_A[0][2]; std_A[0][1] = std_A[0][3];
296 check(std_A, vcl_A,
"A = A (range)", epsilon);
299 for (std::size_t i = 0; i<std_y.size(); ++i)
302 for (std::size_t j = 0; j<std_x.size(); ++j)
303 val += std_A[i][j] * std_x[j];
307 check(std_y, vcl_x,
"x = A*x", epsilon);
309 typedef unsigned int KeyType;
310 std::vector< std::map<KeyType, NumericT> > std_Asparse(N);
312 for (std::size_t i=0; i<std_Asparse.size(); ++i)
315 std_Asparse[i][KeyType(i-1)] = randomNumber();
316 std_Asparse[i][KeyType(i)] =
NumericT(1) + randomNumber();
317 if (i < std_Asparse.size() - 1)
318 std_Asparse[i][KeyType(i+1)] = randomNumber();
334 for (std::size_t i=0; i<std_Asparse.size(); ++i)
337 for (
typename std::map<unsigned int, NumericT>::const_iterator it = std_Asparse[i].begin(); it != std_Asparse[i].end(); ++it)
338 val += it->second * std_x[it->first];
344 check(std_y, vcl_x,
"x = A*x (sparse, csr)", epsilon);
348 check(std_y, vcl_x,
"x = A*x (sparse, coo)", epsilon);
352 check(std_y, vcl_x,
"x = A*x (sparse, ell)", epsilon);
356 check(std_y, vcl_x,
"x = A*x (sparse, sell)", epsilon);
360 check(std_y, vcl_x,
"x = A*x (sparse, hyb)", epsilon);
361 std::cout << std::endl;
367 test_gemm<op_assign>(epsilon, std_A, std_B, std_C, vcl_A,
"A", vcl_B,
"B", vcl_A,
true);
368 test_gemm<op_assign>(epsilon, std_B, std_A, std_C, vcl_B,
"B", vcl_A,
"A", vcl_A,
false);
369 test_gemm<op_assign>(epsilon, std_A, std_A, std_C, vcl_A,
"A", vcl_A,
"A", vcl_A,
true);
370 std::cout << std::endl;
372 test_gemm<op_plus_assign>(epsilon, std_A, std_B, std_C, vcl_A,
"A", vcl_B,
"B", vcl_A,
true);
373 test_gemm<op_plus_assign>(epsilon, std_B, std_A, std_C, vcl_B,
"B", vcl_A,
"A", vcl_A,
false);
374 test_gemm<op_plus_assign>(epsilon, std_A, std_A, std_C, vcl_A,
"A", vcl_A,
"A", vcl_A,
true);
375 std::cout << std::endl;
377 test_gemm<op_minus_assign>(epsilon, std_A, std_B, std_C, vcl_A,
"A", vcl_B,
"B", vcl_A,
true);
378 test_gemm<op_minus_assign>(epsilon, std_B, std_A, std_C, vcl_B,
"B", vcl_A,
"A", vcl_A,
false);
379 test_gemm<op_minus_assign>(epsilon, std_A, std_A, std_C, vcl_A,
"A", vcl_A,
"A", vcl_A,
true);
380 std::cout << std::endl;
389 for (std::size_t i = 0; i<std_A.size(); ++i)
390 for (std::size_t j = 0; j<std_A[i].size(); ++j)
393 for (std::size_t k = 0; k<std_A[i].size(); ++k)
394 tmp += std_Asparse[i][KeyType(k)] * std_A[k][j];
400 check(std_C, vcl_A,
"A = csr*A", epsilon);
404 check(std_C, vcl_A,
"A = coo*A", epsilon);
408 check(std_C, vcl_A,
"A = ell*A", epsilon);
416 check(std_C, vcl_A,
"A = hyb*A", epsilon);
420 for (std::size_t i = 0; i<std_A.size(); ++i)
421 for (std::size_t j = 0; j<std_A[i].size(); ++j)
424 for (std::size_t k = 0; k<std_A[i].size(); ++k)
425 tmp += std_Asparse[i][KeyType(k)] * std_A[j][k];
431 check(std_C, vcl_A,
"A = csr*A^T", epsilon);
435 check(std_C, vcl_A,
"A = coo*A^T", epsilon);
439 check(std_C, vcl_A,
"A = ell*A^T", epsilon);
447 check(std_C, vcl_A,
"A = hyb*A^T", epsilon);
458 std::cout << std::endl;
459 std::cout <<
"----------------------------------------------" << std::endl;
460 std::cout <<
"----------------------------------------------" << std::endl;
461 std::cout <<
"## Test :: Self-Assignment" << std::endl;
462 std::cout <<
"----------------------------------------------" << std::endl;
463 std::cout <<
"----------------------------------------------" << std::endl;
464 std::cout << std::endl;
466 int retval = EXIT_SUCCESS;
468 std::cout << std::endl;
469 std::cout <<
"----------------------------------------------" << std::endl;
470 std::cout << std::endl;
473 NumericT epsilon =
static_cast<NumericT
>(1E-4);
474 std::cout <<
"# Testing setup:" << std::endl;
475 std::cout <<
" eps: " << epsilon << std::endl;
476 std::cout <<
" numeric: float" << std::endl;
477 retval = test<NumericT>(epsilon);
478 if ( retval == EXIT_SUCCESS )
479 std::cout <<
"# Test passed" << std::endl;
483 std::cout << std::endl;
484 std::cout <<
"----------------------------------------------" << std::endl;
485 std::cout << std::endl;
489 std::cout << std::endl;
490 std::cout <<
"------- Test completed --------" << std::endl;
491 std::cout << std::endl;
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
void test_gemm(NumericT epsilon, HostMatrixT &host_A, HostMatrixT &host_B, HostMatrixT &host_C, DeviceMatrixT &device_A, std::string name_A, DeviceMatrixT &device_B, std::string name_B, DeviceMatrixT &device_C, bool copy_from_A, bool trans_first, bool trans_second)
A reader and writer for the matrix market format is implemented here.
NumericT diff(NumericT const &s1, viennacl::scalar< NumericT > const &s2)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(NumericT)
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Implementation of the dense matrix class.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Implementation of the coordinate_matrix class.
static void apply(LHS &lhs, RHS const &rhs)
viennacl::vector< float > v1
Implementation of the hyb_matrix class.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Sparse matrix class using the ELLPACK format for storing the nonzeros.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Implementations of incomplete factorization preconditioners. Convenience header file.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Implementation of the compressed_matrix class.
Implementation of the sliced_ell_matrix class.
int test(NumericT epsilon)
matrix_range< MatrixType > project(MatrixType const &A, viennacl::range const &r1, viennacl::range const &r2)
size_type size2() const
Returns the number of columns.
Implementation of the ell_matrix class.
size_type size1() const
Returns the number of rows.
Proxy classes for vectors.
Implementation of the compressed_compressed_matrix class (CSR format with a relatively small number o...
Proxy classes for matrices.
viennacl::vector< int > v2
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
static void apply(LHS &lhs, RHS const &rhs)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A small collection of sequential random number generators.
size_type size() const
Returns the length of the vector (cf. std::vector)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
static void apply(LHS &lhs, RHS const &rhs)
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
void check(HostContainerT const &host_container, DeviceContainerT const &device_container, std::string current_stage, NumericT epsilon)
Common routines used within ILU-type preconditioners.
Implementation of the ViennaCL scalar class.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)