00001 #ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
00002 #define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00026 #include <utility>
00027 #include <iostream>
00028 #include <fstream>
00029 #include <string>
00030 #include <algorithm>
00031 #include <vector>
00032 #include <math.h>
00033 #include <cmath>
00034 #include <sstream>
00035 #include "viennacl/ocl/backend.hpp"
00036 #include "boost/numeric/ublas/vector.hpp"
00037 #include "boost/numeric/ublas/matrix.hpp"
00038 #include "boost/numeric/ublas/matrix_proxy.hpp"
00039 #include "boost/numeric/ublas/storage.hpp"
00040 #include "boost/numeric/ublas/io.hpp"
00041 #include "boost/numeric/ublas/matrix_expression.hpp"
00042 #include "boost/numeric/ublas/detail/matrix_assign.hpp"
00043
00044
00045 #include "viennacl/vector.hpp"
00046 #include "viennacl/matrix.hpp"
00047
00048 #include "viennacl/linalg/detail/spai/block_matrix.hpp"
00049 #include "viennacl/linalg/detail/spai/block_vector.hpp"
00050 #include "viennacl/linalg/kernels/spai_source.h"
00051 #include "viennacl/linalg/kernels/spai_kernels.h"
00052
00053 namespace viennacl
00054 {
00055 namespace linalg
00056 {
00057 namespace detail
00058 {
00059 namespace spai
00060 {
00061
00062
00063
00064
00065 template< typename T, typename InputIterator>
00066 void Print(std::ostream& ostr, InputIterator it_begin, InputIterator it_end){
00067
00068 std::string delimiters = " ";
00069 std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, delimiters.c_str()));
00070 ostr<<std::endl;
00071 }
00072
00073 template<typename VectorType, typename MatrixType>
00074 void write_to_block(VectorType& con_A_I_J, unsigned int start_ind, const std::vector<unsigned int>& I, const std::vector<unsigned int>& J, MatrixType& m){
00075 m.resize(I.size(), J.size(), false);
00076 for(size_t i = 0; i < J.size(); ++i){
00077 for(size_t j = 0; j < I.size(); ++j){
00078 m(j,i) = con_A_I_J[start_ind + i*I.size() + j];
00079 }
00080 }
00081 }
00082
00083 template<typename VectorType>
00084 void print_continious_matrix(VectorType& con_A_I_J, std::vector<cl_uint>& blocks_ind,
00085 const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> >& g_J){
00086 typedef typename VectorType::value_type ScalarType;
00087 std::vector<boost::numeric::ublas::matrix<ScalarType> > com_A_I_J(g_I.size());
00088 for(size_t i = 0; i < g_I.size(); ++i){
00089 write_to_block( con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);
00090 std::cout<<com_A_I_J[i]<<std::endl;
00091 }
00092 }
00093 template<typename VectorType>
00094 void print_continious_vector(VectorType& con_v, std::vector<cl_uint>& block_ind, const std::vector<std::vector<unsigned int> >& g_J){
00095 typedef typename VectorType::value_type ScalarType;
00096 std::vector<boost::numeric::ublas::vector<ScalarType> > com_v(g_J.size());
00097
00098 for(size_t i = 0; i < g_J.size(); ++i){
00099 com_v[i].resize(g_J[i].size());
00100 for(size_t j = 0; j < g_J[i].size(); ++j){
00101 com_v[i](j) = con_v[block_ind[i] + j];
00102 }
00103 std::cout<<com_v[i]<<std::endl;
00104 }
00105 }
00106
00108
00115 void compute_blocks_size(const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> >& g_J,
00116 unsigned int& sz, std::vector<cl_uint>& blocks_ind, std::vector<cl_uint>& matrix_dims){
00117 sz = 0;
00118 for(size_t i = 0; i < g_I.size(); ++i){
00119 sz += static_cast<unsigned int>(g_I[i].size()*g_J[i].size());
00120 matrix_dims[2*i] = static_cast<cl_uint>(g_I[i].size());
00121 matrix_dims[2*i + 1] = static_cast<cl_uint>(g_J[i].size());
00122 blocks_ind[i+1] = blocks_ind[i] + static_cast<cl_uint>(g_I[i].size()*g_J[i].size());
00123
00124 }
00125 }
00130 void get_size(const std::vector<std::vector<unsigned int> >& inds, unsigned int& size){
00131 size = 0;
00132 for (size_t i = 0; i < inds.size(); ++i) {
00133 size += static_cast<unsigned int>(inds[i].size());
00134 }
00135 }
00136
00141 void init_start_inds(const std::vector<std::vector<unsigned int> >& inds, std::vector<cl_uint>& start_inds){
00142 for(size_t i = 0; i < inds.size(); ++i){
00143 start_inds[i+1] = start_inds[i] + static_cast<cl_uint>(inds[i].size());
00144 }
00145 }
00146
00147
00153 template<typename MatrixType, typename ScalarType>
00154 void dot_prod(const MatrixType& A, unsigned int beg_ind, ScalarType& res){
00155 res = static_cast<ScalarType>(0);
00156 for(size_t i = beg_ind; i < A.size1(); ++i){
00157 res += A(i, beg_ind-1)*A(i, beg_ind-1);
00158 }
00159 }
00167 template<typename MatrixType, typename VectorType, typename ScalarType>
00168 void custom_inner_prod(const MatrixType& A, const VectorType& v, unsigned int col_ind, unsigned int start_ind, ScalarType& res){
00169 res = static_cast<ScalarType>(0);
00170 for(unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); ++i){
00171 res += A(i, col_ind)*v(i);
00172 }
00173 }
00174
00180 template<typename MatrixType, typename VectorType>
00181 void copy_vector(const MatrixType & A, VectorType & v, const unsigned int beg_ind){
00182 for(unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i){
00183 v(i) = A( i, beg_ind-1);
00184 }
00185 }
00186
00187
00194 template<typename MatrixType, typename VectorType, typename ScalarType>
00195 void householder_vector(const MatrixType& A, unsigned int j, VectorType& v, ScalarType& b){
00196 ScalarType sg;
00197
00198 dot_prod(A, j+1, sg);
00199 copy_vector(A, v, j+1);
00200 ScalarType mu;
00201 v(j) = static_cast<ScalarType>(1.0);
00202 if(sg == 0){
00203 b = 0;
00204 }
00205 else{
00206 mu = std::sqrt(A(j,j)*A(j, j) + sg);
00207 if(A(j, j) <= 0){
00208 v(j) = A(j, j) - mu;
00209 }else{
00210 v(j) = -sg/(A(j, j) + mu);
00211 }
00212 b = 2*(v(j)*v(j))/(sg + v(j)*v(j));
00213 v = v/v(j);
00214 }
00215 }
00222 template<typename MatrixType, typename VectorType, typename ScalarType>
00223 void apply_householder_reflection(MatrixType& A, unsigned int iter_cnt, VectorType& v, ScalarType b){
00224
00225 ScalarType in_prod_res;
00226 for(unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); ++i){
00227
00228 custom_inner_prod(A, v, i, iter_cnt, in_prod_res);
00229 for(unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); ++j){
00230 A(j, i) -= b*in_prod_res*v(j);
00231 }
00232 }
00233 }
00234
00240 template<typename MatrixType, typename VectorType>
00241 void store_householder_vector(MatrixType& A, unsigned int ind, VectorType& v){
00242 for(unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i){
00243 A(i, ind-1) = v(i);
00244 }
00245 }
00246
00247
00248
00253 template<typename MatrixType, typename VectorType>
00254 void single_qr(MatrixType& R, VectorType& b_v){
00255 typedef typename MatrixType::value_type ScalarType;
00256 if((R.size1() > 0) && (R.size2() > 0)){
00257 VectorType v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size1());
00258 b_v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size2());
00259 for(unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i){
00260 householder_vector(R, i, v, b_v[i]);
00261 apply_householder_reflection(R, i, v, b_v[i]);
00262 if(i < R.size1()) store_householder_vector(R, i+1, v);
00263 }
00264 }
00265 }
00266
00267
00272 void read_kernel_from_file(std::string& file_name, std::string& kernel_source){
00273 std::ifstream ifs(file_name.c_str(), std::ifstream::in);
00274
00275 if (!ifs)
00276 std::cerr << "WARNING: Cannot open file " << file_name << std::endl;
00277
00278 std::string line;
00279 std::ostringstream ost;
00280 while (std::getline(ifs, line)) {
00281 ost<<line<<std::endl;
00282 }
00283 kernel_source = ost.str();
00284 }
00285
00290 void get_max_block_size(const std::vector<std::vector<unsigned int> >& inds, unsigned int& max_size){
00291 max_size = 0;
00292 for(unsigned int i = 0; i < inds.size(); ++i){
00293 if(inds[i].size() > max_size){
00294 max_size = static_cast<unsigned int>(inds[i].size());
00295 }
00296 }
00297 }
00298
00305 template<typename MatrixType, typename VectorType, typename ScalarType>
00306 void custom_dot_prod(const MatrixType& A, const VectorType& v, unsigned int ind, ScalarType& res){
00307 res = static_cast<ScalarType>(0);
00308 for(unsigned int j = ind; j < A.size1(); ++j){
00309 if(j == ind){
00310 res += v(j);
00311 }else{
00312 res += A(j, ind)*v(j);
00313 }
00314 }
00315 }
00316
00322 template<typename MatrixType, typename VectorType>
00323 void apply_q_trans_vec(const MatrixType& R, const VectorType& b_v, VectorType& y){
00324 typedef typename MatrixType::value_type ScalarType;
00325 ScalarType inn_prod = static_cast<ScalarType>(0);
00326 for(size_t i = 0; i < R.size2(); ++i){
00327 custom_dot_prod(R, y, static_cast<unsigned int>(i), inn_prod);
00328 for(size_t j = i; j < R.size1(); ++j){
00329 if(i == j){
00330 y(j) -= b_v(i)*inn_prod;
00331 }
00332 else{
00333 y(j) -= b_v(i)*inn_prod*R(j,i);
00334 }
00335 }
00336 }
00337 }
00338
00344 template<typename MatrixType, typename VectorType>
00345 void apply_q_trans_mat(const MatrixType& R, const VectorType& b_v, MatrixType& A){
00346 VectorType tmp_v;
00347 for(size_t i = 0; i < A.size2(); ++i){
00348 tmp_v = (VectorType)column(A,i);
00349 apply_q_trans_vec(R, b_v, tmp_v);
00350 column(A,i) = tmp_v;
00351 }
00352 }
00353
00354
00364 template<typename ScalarType>
00365 void block_qr(std::vector<std::vector<unsigned int> >& g_I,
00366 std::vector<std::vector<unsigned int> >& g_J,
00367 block_matrix& g_A_I_J_vcl,
00368 block_vector& g_bv_vcl,
00369 std::vector<cl_uint>& g_is_update,
00370 const unsigned int cur_iter){
00371
00372 unsigned int bv_size;
00373 unsigned int v_size;
00374
00375
00376 unsigned int local_r_n, local_c_n;
00377
00378 get_max_block_size(g_I, local_r_n);
00379 get_max_block_size(g_J, local_c_n);
00380
00381 get_size(g_J, bv_size);
00382 get_size(g_I, v_size);
00383
00384 std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);
00385 std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);
00386 init_start_inds(g_J, start_bv_inds);
00387 init_start_inds(g_I, start_v_inds);
00388
00389 std::vector<ScalarType> b_v(bv_size, static_cast<ScalarType>(0));
00390 std::vector<ScalarType> v(v_size, static_cast<ScalarType>(0));
00391
00392 block_vector v_vcl;
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404 g_bv_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00405 static_cast<unsigned int>(sizeof(ScalarType)*bv_size),
00406 &(b_v[0]));
00407
00408 v_vcl.handle() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00409 static_cast<unsigned int>(sizeof(ScalarType)*v_size),
00410 &(v[0]));
00411
00412 g_bv_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00413 static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),
00414 &(start_bv_inds[0]));
00415
00416 v_vcl.handle1() = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00417 static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),
00418 &(start_v_inds[0]));
00419 viennacl::ocl::handle<cl_mem> g_is_update_vcl = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00420 static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),
00421 &(g_is_update[0]));
00422
00423
00424 viennacl::ocl::kernel& qr_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::spai<ScalarType, 1>::program_name(), "block_qr");
00425 qr_kernel.local_work_size(0, local_c_n);
00426 qr_kernel.global_work_size(0, 256);
00427 viennacl::ocl::enqueue(qr_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle1(), g_bv_vcl.handle(),
00428 v_vcl.handle(), g_A_I_J_vcl.handle2(),
00429 g_bv_vcl.handle1(), v_vcl.handle1(), g_is_update_vcl,
00430 viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))),
00431 static_cast<cl_uint>(g_I.size())));
00432
00433 }
00434 }
00435 }
00436 }
00437 }
00438 #endif