1 #ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_VECTOR_AXPY_HPP
2 #define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_VECTOR_AXPY_HPP
44 namespace device_specific
51 unsigned int _group_size,
unsigned int _num_groups,
66 return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
67 return TEMPLATE_VALID;
70 std::vector<std::string> generate_impl(std::string
const & kernel_prefix,
statements_container const & statements, std::vector<mapping_type>
const & mappings)
const
72 std::vector<std::string> result;
73 for (
unsigned int i = 0; i < 2; ++i)
77 std::string suffix = (i==0)?
"_strided":
"";
78 stream <<
" __attribute__((reqd_work_group_size(" <<
p_.
local_size_0 <<
",1,1)))" << std::endl;
79 generate_prototype(stream, kernel_prefix + suffix,
"unsigned int N,", mappings, statements);
80 stream <<
"{" << std::endl;
89 loop_body(
statements_container const & statements_, std::vector<mapping_type>
const & mappings_) : statements(statements_), mappings(mappings_) { }
93 std::string process_str;
94 std::string i_str = (kernel_simd_width==1)?
"i*#stride":
"i";
96 process_str =
utils::append_width(
"#scalartype",kernel_simd_width) +
" #namereg = " +
vload(kernel_simd_width, i_str,
"#pointer") +
";";
100 tree_parsing::process(kernel_stream,
PARENT_NODE_TYPE,
"matrix_diag",
"#scalartype #namereg = #pointer[#diag_offset<0?$OFFSET{(i - #diag_offset)*#stride1, i*#stride2}:$OFFSET{i*#stride1, (i + #diag_offset)*#stride2}];", statements, mappings);
102 std::map<std::string, std::string> accessors;
103 accessors[
"vector"] =
"#namereg";
104 accessors[
"matrix_row"] =
"#namereg";
105 accessors[
"matrix_column"] =
"#namereg";
106 accessors[
"matrix_diag"] =
"#namereg";
107 accessors[
"scalar"] =
"#namereg";
110 process_str =
vstore(kernel_simd_width,
"#namereg",i_str,
"#pointer")+
";";
114 tree_parsing::process(kernel_stream,
LHS_NODE_TYPE,
"matrix_diag",
"#pointer[#diag_offset<0?$OFFSET{i - #diag_offset, i}:$OFFSET{i, i + #diag_offset}] = #namereg;", statements, mappings);
120 std::vector<mapping_type>
const & mappings;
123 element_wise_loop_1D(stream, loop_body(statements, mappings),
p_.fetching_policy, simd_width,
"i",
"N",
"get_global_id(0)",
"get_global_size(0)");
126 stream <<
"}" << std::endl;
127 result.push_back(stream.
str());
142 kernel = &programs[0].program().get_kernel(kernel_prefix+
"_strided");
144 kernel = &programs[1].program().get_kernel(kernel_prefix);
148 unsigned int current_arg = 0;
151 kernel->
arg(current_arg++, size);
157 bool up_to_internal_size_;
unsigned int local_size_0
void set_arguments(statements_container const &statements, viennacl::ocl::kernel &kernel, unsigned int ¤t_arg)
Represents an OpenCL kernel within ViennaCL.
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
static std::string vstore(unsigned int simd_width, std::string const &value, std::string const &offset, std::string const &ptr)
parameters_type const & parameters() const
A class representing a compute device (e.g. a GPU)
container_type const & array() const
std::list< scheduler::statement > const & data() const
scheduler::statement_node const & lhs_most(scheduler::statement::container_type const &array, vcl_size_t root)
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Map ViennaCL objects to generator wrappers.
static bool has_strided_access(statements_container const &statements)
vector_axpy_template(vector_axpy_template::parameters_type const ¶meters, binding_policy_t binding_policy=BIND_ALL_UNIQUE)
fetching_policy_type fetching_policy
std::string evaluate(leaf_t leaf, std::map< std::string, std::string > const &accessors, scheduler::statement const &statement, vcl_size_t root_idx, mapping_type const &mapping)
vector_axpy_parameters(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy)
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
static vcl_size_t vector_size(scheduler::statement_node const &node, bool up_to_internal_size)
static void generate_prototype(utils::kernel_generation_stream &stream, std::string const &name, std::string const &first_arguments, std::vector< mapping_type > const &mappings, statements_container const &statements, std::map< std::string, unsigned int > const &widths)
void up_to_internal_size(bool v)
Code for parsing the expression trees.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
static void element_wise_loop_1D(utils::kernel_generation_stream &stream, loop_body_base const &loop_body, fetching_policy_type fetch, unsigned int simd_width, std::string const &i, std::string const &bound, std::string const &domain_id, std::string const &domain_size)
static std::string vload(unsigned int simd_width, std::string const &offset, std::string const &ptr)
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
void arg(unsigned int pos, cl_char val)
Sets a char argument at the provided position.
void enqueue(std::string const &kernel_prefix, std::vector< lazy_program_compiler > &programs, statements_container const &statements)
parameters_type(unsigned int _simd_width, unsigned int _local_size_1, unsigned int _local_size_2, unsigned int _num_kernels)
void process(utils::kernel_generation_stream &stream, leaf_t leaf, std::string const &type_key, std::string const &to_process, scheduler::statement const &statement, vcl_size_t root_idx, mapping_type const &mapping, std::set< std::string > &already_processed)
std::string append_width(std::string const &str, unsigned int width)