ViennaCL - The Vienna Computing Library  1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
vector_axpy_template.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_VECTOR_AXPY_HPP
2 #define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_VECTOR_AXPY_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2015, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
27 #include <vector>
28 #include <cmath>
29 
31 
36 
39 
40 #include "viennacl/tools/tools.hpp"
41 
42 namespace viennacl
43 {
44 namespace device_specific
45 {
46 
48 {
49 public:
50  vector_axpy_parameters(unsigned int _simd_width,
51  unsigned int _group_size, unsigned int _num_groups,
52  fetching_policy_type _fetching_policy) : template_base::parameters_type(_simd_width, _group_size, 1, 1), num_groups(_num_groups), fetching_policy(_fetching_policy){ }
53 
54 
55 
56  unsigned int num_groups;
58 };
59 
60 class vector_axpy_template : public template_base_impl<vector_axpy_template, vector_axpy_parameters>
61 {
62 private:
63  virtual int check_invalid_impl(viennacl::ocl::device const & /*dev*/) const
64  {
65  if (p_.fetching_policy==FETCH_FROM_LOCAL)
66  return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
67  return TEMPLATE_VALID;
68  }
69 
70  std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings) const
71  {
72  std::vector<std::string> result;
73  for (unsigned int i = 0; i < 2; ++i)
74  {
76  unsigned int simd_width = (i==0)?1:p_.simd_width;
77  std::string suffix = (i==0)?"_strided":"";
78  stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
79  generate_prototype(stream, kernel_prefix + suffix, "unsigned int N,", mappings, statements);
80  stream << "{" << std::endl;
81  stream.inc_tab();
82 
83  tree_parsing::process(stream, PARENT_NODE_TYPE, "scalar", "#scalartype #namereg = *#pointer;", statements, mappings);
84  tree_parsing::process(stream, PARENT_NODE_TYPE, "matrix", "#pointer += $OFFSET{#start1, #start2};", statements, mappings);
85  tree_parsing::process(stream, PARENT_NODE_TYPE, "vector", "#pointer += #start;", statements, mappings);
86 
87  struct loop_body : public loop_body_base
88  {
89  loop_body(statements_container const & statements_, std::vector<mapping_type> const & mappings_) : statements(statements_), mappings(mappings_) { }
90 
91  void operator()(utils::kernel_generation_stream & kernel_stream, unsigned int kernel_simd_width) const
92  {
93  std::string process_str;
94  std::string i_str = (kernel_simd_width==1)?"i*#stride":"i";
95 
96  process_str = utils::append_width("#scalartype",kernel_simd_width) + " #namereg = " + vload(kernel_simd_width, i_str, "#pointer") + ";";
97  tree_parsing::process(kernel_stream, PARENT_NODE_TYPE, "vector", process_str, statements, mappings);
98  tree_parsing::process(kernel_stream, PARENT_NODE_TYPE, "matrix_row", "#scalartype #namereg = #pointer[$OFFSET{#row*#stride1, i*#stride2}];", statements, mappings);
99  tree_parsing::process(kernel_stream, PARENT_NODE_TYPE, "matrix_column", "#scalartype #namereg = #pointer[$OFFSET{i*#stride1,#column*#stride2}];", statements, mappings);
100  tree_parsing::process(kernel_stream, PARENT_NODE_TYPE, "matrix_diag", "#scalartype #namereg = #pointer[#diag_offset<0?$OFFSET{(i - #diag_offset)*#stride1, i*#stride2}:$OFFSET{i*#stride1, (i + #diag_offset)*#stride2}];", statements, mappings);
101 
102  std::map<std::string, std::string> accessors;
103  accessors["vector"] = "#namereg";
104  accessors["matrix_row"] = "#namereg";
105  accessors["matrix_column"] = "#namereg";
106  accessors["matrix_diag"] = "#namereg";
107  accessors["scalar"] = "#namereg";
108  tree_parsing::evaluate(kernel_stream, PARENT_NODE_TYPE, accessors, statements, mappings);
109 
110  process_str = vstore(kernel_simd_width, "#namereg",i_str,"#pointer")+";";
111  tree_parsing::process(kernel_stream, LHS_NODE_TYPE, "vector", process_str, statements, mappings);
112  tree_parsing::process(kernel_stream, LHS_NODE_TYPE, "matrix_row", "#pointer[$OFFSET{#row, i}] = #namereg;", statements, mappings);
113  tree_parsing::process(kernel_stream, LHS_NODE_TYPE, "matrix_column", "#pointer[$OFFSET{i, #column}] = #namereg;", statements, mappings);
114  tree_parsing::process(kernel_stream, LHS_NODE_TYPE, "matrix_diag", "#pointer[#diag_offset<0?$OFFSET{i - #diag_offset, i}:$OFFSET{i, i + #diag_offset}] = #namereg;", statements, mappings);
115 
116  }
117 
118  private:
119  statements_container const & statements;
120  std::vector<mapping_type> const & mappings;
121  };
122 
123  element_wise_loop_1D(stream, loop_body(statements, mappings), p_.fetching_policy, simd_width, "i", "N", "get_global_id(0)", "get_global_size(0)");
124 
125  stream.dec_tab();
126  stream << "}" << std::endl;
127  result.push_back(stream.str());
128  }
129 
130  return result;
131  }
132 
133 public:
135 
136  void up_to_internal_size(bool v) { up_to_internal_size_ = v; }
137 
138  void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements)
139  {
140  viennacl::ocl::kernel * kernel;
141  if (has_strided_access(statements) && p_.simd_width > 1)
142  kernel = &programs[0].program().get_kernel(kernel_prefix+"_strided");
143  else
144  kernel = &programs[1].program().get_kernel(kernel_prefix);
145 
146  kernel->local_work_size(0, p_.local_size_0);
147  kernel->global_work_size(0, p_.local_size_0*p_.num_groups);
148  unsigned int current_arg = 0;
149  scheduler::statement const & statement = statements.data().front();
150  cl_uint size = static_cast<cl_uint>(vector_size(lhs_most(statement.array(), statement.root()), up_to_internal_size_));
151  kernel->arg(current_arg++, size);
152  set_arguments(statements, *kernel, current_arg);
153  viennacl::ocl::enqueue(*kernel);
154  }
155 
156 private:
157  bool up_to_internal_size_;
158 };
159 
160 }
161 }
162 
163 #endif
void set_arguments(statements_container const &statements, viennacl::ocl::kernel &kernel, unsigned int &current_arg)
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Various little tools used here and there in ViennaCL.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
static std::string vstore(unsigned int simd_width, std::string const &value, std::string const &offset, std::string const &ptr)
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
container_type const & array() const
Definition: forwards.h:528
std::list< scheduler::statement > const & data() const
Definition: forwards.h:282
scheduler::statement_node const & lhs_most(scheduler::statement::container_type const &array, vcl_size_t root)
Definition: forwards.h:87
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:34
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:235
Map ViennaCL objects to generator wrappers.
static bool has_strided_access(statements_container const &statements)
vector_axpy_template(vector_axpy_template::parameters_type const &parameters, binding_policy_t binding_policy=BIND_ALL_UNIQUE)
std::string evaluate(leaf_t leaf, std::map< std::string, std::string > const &accessors, scheduler::statement const &statement, vcl_size_t root_idx, mapping_type const &mapping)
vector_axpy_parameters(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy)
Forwards declaration.
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
static vcl_size_t vector_size(scheduler::statement_node const &node, bool up_to_internal_size)
static void generate_prototype(utils::kernel_generation_stream &stream, std::string const &name, std::string const &first_arguments, std::vector< mapping_type > const &mappings, statements_container const &statements, std::map< std::string, unsigned int > const &widths)
Code for parsing the expression trees.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Internal utils.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
static void element_wise_loop_1D(utils::kernel_generation_stream &stream, loop_body_base const &loop_body, fetching_policy_type fetch, unsigned int simd_width, std::string const &i, std::string const &bound, std::string const &domain_id, std::string const &domain_size)
static std::string vload(unsigned int simd_width, std::string const &offset, std::string const &ptr)
size_type root() const
Definition: forwards.h:530
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:502
void arg(unsigned int pos, cl_char val)
Sets a char argument at the provided position.
Definition: kernel.hpp:116
void enqueue(std::string const &kernel_prefix, std::vector< lazy_program_compiler > &programs, statements_container const &statements)
parameters_type(unsigned int _simd_width, unsigned int _local_size_1, unsigned int _local_size_2, unsigned int _num_kernels)
void process(utils::kernel_generation_stream &stream, leaf_t leaf, std::string const &type_key, std::string const &to_process, scheduler::statement const &statement, vcl_size_t root_idx, mapping_type const &mapping, std::set< std::string > &already_processed)
std::string append_width(std::string const &str, unsigned int width)
Definition: utils.hpp:558