ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
matrix_axpy_template.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_MATRIX_AXPY_HPP
2 #define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_MATRIX_AXPY_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
27 #include <vector>
28 
30 
34 
36 
37 #include "viennacl/tools/tools.hpp"
38 
39 namespace viennacl
40 {
41 namespace device_specific
42 {
43 
45 {
46 public:
47  matrix_axpy_parameters_type(unsigned int _simd_width,
48  unsigned int _local_size_0, unsigned int _local_size_1,
49  unsigned int _num_groups_0, unsigned int _num_groups_1,
50  fetching_policy_type _fetching_policy) : template_base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1), num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetching_policy(_fetching_policy){ }
51 
52  unsigned int num_groups_0;
53  unsigned int num_groups_1;
55 };
56 
57 class matrix_axpy_template : public template_base_impl<matrix_axpy_template, matrix_axpy_parameters_type>
58 {
59 private:
60  int check_invalid_impl(viennacl::ocl::device const & /*dev*/) const
61  {
62  if (p_.simd_width>1)
63  return TEMPLATE_INVALID_SIMD_WIDTH;
64  return TEMPLATE_VALID;
65  }
66 
67  std::string generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
68  {
69  std::string process_str;
71 
72  std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1;
73 
74  stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
75  generate_prototype(stream, kernel_prefix, "unsigned int M, unsigned int N,", mappings, statements);
76  stream << "{" << std::endl;
77  stream.inc_tab();
78 
79  tree_parsing::process(stream, PARENT_NODE_TYPE, "scalar", "#scalartype #namereg = *#pointer;", statements, mappings);
80  tree_parsing::process(stream, PARENT_NODE_TYPE, "matrix", "#pointer += $OFFSET{#start1, #start2};", statements, mappings);
81  tree_parsing::process(stream, PARENT_NODE_TYPE, "vector", "#pointer += #start;", statements, mappings);
82 
83  fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)");
84  stream << "for(unsigned int i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl;
85  stream << "{" << std::endl;
86  stream.inc_tab();
87  fetching_loop_info(p_.fetching_policy, "N", stream, init1, upper_bound1, inc1, "get_global_id(1)", "get_global_size(1)");
88  stream << "for(unsigned int j = " << init1 << "; j < " << upper_bound1 << "; j += " << inc1 << ")" << std::endl;
89  stream << "{" << std::endl;
90  stream.inc_tab();
91 
92  process_str = utils::append_width("#scalartype",simd_width) + " #namereg = " + vload(simd_width, "$OFFSET{i*#stride1,j*#stride2}", "#pointer")+ ";";
93  tree_parsing::process(stream, PARENT_NODE_TYPE, "matrix", process_str, statements, mappings);
94  tree_parsing::process(stream, PARENT_NODE_TYPE, "vector_diag", "#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:#pointer[min(i*#stride, j*#stride)];", statements, mappings);
95 
96 
97  std::map<std::string, std::string> accessors;
98  accessors["matrix"] = "#namereg";
99  accessors["vector_diag"] = "#namereg";
100  accessors["scalar"] = "#namereg";
101  tree_parsing::evaluate(stream, PARENT_NODE_TYPE, accessors, statements, mappings);
102 
103  process_str = vstore(simd_width, "#namereg", "$OFFSET{i*#stride1,j*#stride2}", "#pointer")+";";
104  tree_parsing::process(stream, LHS_NODE_TYPE, "matrix", process_str, statements, mappings);
105 
106  stream.dec_tab();
107  stream << "}" << std::endl;
108  stream.dec_tab();
109  stream << "}" << std::endl;
110 
111  stream.dec_tab();
112  stream << "}" << std::endl;
113 
114  return stream.str();
115  }
116 
117  std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings) const
118  {
119  std::vector<std::string> res;
120  res.push_back(generate_impl(kernel_prefix, statements, mappings, 1));
121  return res;
122  }
123 
124 public:
125  matrix_axpy_template(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE) : template_base_impl<matrix_axpy_template, matrix_axpy_parameters_type>(parameters, binding_policy), up_to_internal_size_(false){ }
126 
127  void up_to_internal_size(bool v)
128  {
129  up_to_internal_size_ = v;
130  }
131 
132  void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements)
133  {
134  viennacl::ocl::kernel & kernel = programs[0].program().get_kernel(kernel_prefix);
135 
136  kernel.local_work_size(0, p_.local_size_0);
137  kernel.local_work_size(1, p_.local_size_1);
138  kernel.global_work_size(0,p_.local_size_0*p_.num_groups_0);
139  kernel.global_work_size(1,p_.local_size_1*p_.num_groups_1);
140 
141  scheduler::statement_node const & root = statements.data().front().array()[statements.data().front().root()];
142  unsigned int current_arg = 0;
143  if (up_to_internal_size_)
144  {
145  kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::internal_size1_fun())));
146  kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::internal_size2_fun())));
147  }
148  else
149  {
150  kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::size1_fun())));
151  kernel.arg(current_arg++, cl_uint(utils::call_on_matrix(root.lhs, utils::size2_fun())));
152  }
153 
154  set_arguments(statements, kernel, current_arg);
155 
156  viennacl::ocl::enqueue(kernel);
157  }
158 
159 
160 private:
161  bool up_to_internal_size_;
162 };
163 
164 }
165 }
166 
167 #endif
void set_arguments(statements_container const &statements, viennacl::ocl::kernel &kernel, unsigned int &current_arg)
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Various little tools used here and there in ViennaCL.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
static std::string vstore(unsigned int simd_width, std::string const &value, std::string const &offset, std::string const &ptr)
void enqueue(std::string const &kernel_prefix, std::vector< lazy_program_compiler > &programs, statements_container const &statements)
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
std::list< scheduler::statement > const & data() const
Definition: forwards.h:282
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
Map ViennaCL objects to generator wrappers.
static void fetching_loop_info(fetching_policy_type policy, std::string const &bound, utils::kernel_generation_stream &stream, std::string &init, std::string &upper_bound, std::string &inc, std::string const &domain_id, std::string const &domain_size)
matrix_axpy_parameters_type(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy)
std::string evaluate(leaf_t leaf, std::map< std::string, std::string > const &accessors, scheduler::statement const &statement, vcl_size_t root_idx, mapping_type const &mapping)
matrix_axpy_template(parameters_type const &parameters, binding_policy_t binding_policy=BIND_ALL_UNIQUE)
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
static void generate_prototype(utils::kernel_generation_stream &stream, std::string const &name, std::string const &first_arguments, std::vector< mapping_type > const &mappings, statements_container const &statements, std::map< std::string, unsigned int > const &widths)
Code for parsing the expression trees.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Internal utils.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
static std::string vload(unsigned int simd_width, std::string const &offset, std::string const &ptr)
void arg(unsigned int pos, cl_char val)
Sets a char argument at the provided position.
Definition: kernel.hpp:116
Main datastructure for an node in the statement tree.
Definition: forwards.h:478
parameters_type(unsigned int _simd_width, unsigned int _local_size_1, unsigned int _local_size_2, unsigned int _num_kernels)
void process(utils::kernel_generation_stream &stream, leaf_t leaf, std::string const &type_key, std::string const &to_process, scheduler::statement const &statement, vcl_size_t root_idx, mapping_type const &mapping, std::set< std::string > &already_processed)
std::string append_width(std::string const &str, unsigned int width)
Definition: utils.hpp:558