1 #ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_MATRIX_AXPY_HPP
2 #define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_MATRIX_AXPY_HPP
41 namespace device_specific
48 unsigned int _local_size_0,
unsigned int _local_size_1,
49 unsigned int _num_groups_0,
unsigned int _num_groups_1,
63 return TEMPLATE_INVALID_SIMD_WIDTH;
64 return TEMPLATE_VALID;
67 std::string generate_impl(std::string
const & kernel_prefix,
statements_container const & statements, std::vector<mapping_type>
const & mappings,
unsigned int simd_width)
const
69 std::string process_str;
72 std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1;
75 generate_prototype(stream, kernel_prefix,
"unsigned int M, unsigned int N,", mappings, statements);
76 stream <<
"{" << std::endl;
83 fetching_loop_info(
p_.fetching_policy,
"M", stream, init0, upper_bound0, inc0,
"get_global_id(0)",
"get_global_size(0)");
84 stream <<
"for(unsigned int i = " << init0 <<
"; i < " << upper_bound0 <<
"; i += " << inc0 <<
")" << std::endl;
85 stream <<
"{" << std::endl;
87 fetching_loop_info(
p_.fetching_policy,
"N", stream, init1, upper_bound1, inc1,
"get_global_id(1)",
"get_global_size(1)");
88 stream <<
"for(unsigned int j = " << init1 <<
"; j < " << upper_bound1 <<
"; j += " << inc1 <<
")" << std::endl;
89 stream <<
"{" << std::endl;
92 process_str =
utils::append_width(
"#scalartype",simd_width) +
" #namereg = " +
vload(simd_width,
"$OFFSET{i*#stride1,j*#stride2}",
"#pointer")+
";";
94 tree_parsing::process(stream,
PARENT_NODE_TYPE,
"vector_diag",
"#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:#pointer[min(i*#stride, j*#stride)];", statements, mappings);
97 std::map<std::string, std::string> accessors;
98 accessors[
"matrix"] =
"#namereg";
99 accessors[
"vector_diag"] =
"#namereg";
100 accessors[
"scalar"] =
"#namereg";
103 process_str =
vstore(simd_width,
"#namereg",
"$OFFSET{i*#stride1,j*#stride2}",
"#pointer")+
";";
107 stream <<
"}" << std::endl;
109 stream <<
"}" << std::endl;
112 stream <<
"}" << std::endl;
117 std::vector<std::string> generate_impl(std::string
const & kernel_prefix,
statements_container const & statements, std::vector<mapping_type>
const & mappings)
const
119 std::vector<std::string> res;
120 res.push_back(generate_impl(kernel_prefix, statements, mappings, 1));
129 up_to_internal_size_ = v;
142 unsigned int current_arg = 0;
143 if (up_to_internal_size_)
161 bool up_to_internal_size_;
void up_to_internal_size(bool v)
unsigned int local_size_0
void set_arguments(statements_container const &statements, viennacl::ocl::kernel &kernel, unsigned int ¤t_arg)
Represents an OpenCL kernel within ViennaCL.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
static std::string vstore(unsigned int simd_width, std::string const &value, std::string const &offset, std::string const &ptr)
void enqueue(std::string const &kernel_prefix, std::vector< lazy_program_compiler > &programs, statements_container const &statements)
parameters_type const & parameters() const
A class representing a compute device (e.g. a GPU)
std::list< scheduler::statement > const & data() const
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
unsigned int local_size_1
Map ViennaCL objects to generator wrappers.
static void fetching_loop_info(fetching_policy_type policy, std::string const &bound, utils::kernel_generation_stream &stream, std::string &init, std::string &upper_bound, std::string &inc, std::string const &domain_id, std::string const &domain_size)
matrix_axpy_parameters_type(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy)
std::string evaluate(leaf_t leaf, std::map< std::string, std::string > const &accessors, scheduler::statement const &statement, vcl_size_t root_idx, mapping_type const &mapping)
matrix_axpy_template(parameters_type const ¶meters, binding_policy_t binding_policy=BIND_ALL_UNIQUE)
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
static void generate_prototype(utils::kernel_generation_stream &stream, std::string const &name, std::string const &first_arguments, std::vector< mapping_type > const &mappings, statements_container const &statements, std::map< std::string, unsigned int > const &widths)
Code for parsing the expression trees.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
unsigned int num_groups_0
fetching_policy_type fetching_policy
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
static std::string vload(unsigned int simd_width, std::string const &offset, std::string const &ptr)
void arg(unsigned int pos, cl_char val)
Sets a char argument at the provided position.
unsigned int num_groups_1
Main datastructure for an node in the statement tree.
parameters_type(unsigned int _simd_width, unsigned int _local_size_1, unsigned int _local_size_2, unsigned int _num_kernels)
void process(utils::kernel_generation_stream &stream, leaf_t leaf, std::string const &type_key, std::string const &to_process, scheduler::statement const &statement, vcl_size_t root_idx, mapping_type const &mapping, std::set< std::string > &already_processed)
std::string append_width(std::string const &str, unsigned int width)