ViennaCL - The Vienna Computing Library
1.7.0
Free open-source GPU-accelerated linear algebra and solver library.
tesla_k20m.hpp
Go to the documentation of this file.
1
#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_KEPLER_K20M_HPP_
2
#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_KEPLER_K20M_HPP_
3
4
/* =========================================================================
5
Copyright (c) 2010-2015, Institute for Microelectronics,
6
Institute for Analysis and Scientific Computing,
7
TU Wien.
8
Portions of this software are copyright by UChicago Argonne, LLC.
9
10
-----------------
11
ViennaCL - The Vienna Computing Library
12
-----------------
13
14
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15
16
(A list of authors and contributors can be found in the manual)
17
18
License: MIT (X11), see file LICENSE in the base directory
19
============================================================================= */
20
21
#include "
viennacl/device_specific/templates/matrix_product_template.hpp
"
22
23
#include "
viennacl/device_specific/templates/row_wise_reduction_template.hpp
"
24
25
#include "
viennacl/device_specific/templates/reduction_template.hpp
"
26
27
#include "
viennacl/device_specific/templates/matrix_axpy_template.hpp
"
28
29
#include "
viennacl/device_specific/templates/vector_axpy_template.hpp
"
30
31
#include "
viennacl/device_specific/forwards.h
"
32
#include "
viennacl/device_specific/builtin_database/common.hpp
"
33
34
namespace
viennacl
{
35
namespace
device_specific{
36
namespace
builtin_database{
37
namespace
devices{
38
namespace
gpu{
39
namespace
nvidia{
40
namespace
kepler
{
41
namespace
tesla_k20m{
42
43
inline
void
add_8B
(
database_type<matrix_product_template::parameters_type>
& db,
char_to_type<'T'>
,
char_to_type<'T'>
)
44
{
45
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_product_template::parameters_type
(1,2,8,32,8,2,4,
FETCH_FROM_LOCAL
,
FETCH_FROM_GLOBAL_STRIDED
,4,16));
46
}
47
48
inline
void
add_8B
(
database_type<matrix_product_template::parameters_type>
& db,
char_to_type<'T'>
,
char_to_type<'N'>
)
49
{
50
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_product_template::parameters_type
(1,16,16,32,2,1,4,
FETCH_FROM_LOCAL
,
FETCH_FROM_LOCAL
,16,32));
51
}
52
53
inline
void
add_8B
(
database_type<matrix_product_template::parameters_type>
& db,
char_to_type<'N'>
,
char_to_type<'T'>
)
54
{
55
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_product_template::parameters_type
(1,2,8,64,16,1,2,
FETCH_FROM_LOCAL
,
FETCH_FROM_GLOBAL_STRIDED
,32,4));
56
}
57
58
inline
void
add_8B
(
database_type<matrix_product_template::parameters_type>
& db,
char_to_type<'N'>
,
char_to_type<'N'>
)
59
{
60
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_product_template::parameters_type
(1,128,32,1,1,1,16,
FETCH_FROM_GLOBAL_CONTIGUOUS
,
FETCH_FROM_LOCAL
,16,8));
61
}
62
63
inline
void
add_4B
(
database_type<matrix_product_template::parameters_type>
& db,
char_to_type<'T'>
,
char_to_type<'T'>
)
64
{
65
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_product_template::parameters_type
(1,8,32,16,4,8,4,
FETCH_FROM_LOCAL
,
FETCH_FROM_GLOBAL_STRIDED
,8,16));
66
}
67
68
inline
void
add_4B
(
database_type<matrix_product_template::parameters_type>
& db,
char_to_type<'T'>
,
char_to_type<'N'>
)
69
{
70
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_product_template::parameters_type
(1,32,16,32,8,2,4,
FETCH_FROM_LOCAL
,
FETCH_FROM_LOCAL
,16,64));
71
}
72
73
inline
void
add_4B
(
database_type<matrix_product_template::parameters_type>
& db,
char_to_type<'N'>
,
char_to_type<'T'>
)
74
{
75
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_product_template::parameters_type
(4,8,2,4,8,2,8,
FETCH_FROM_GLOBAL_STRIDED
,
FETCH_FROM_GLOBAL_CONTIGUOUS
,0,0));
76
}
77
78
inline
void
add_4B
(
database_type<matrix_product_template::parameters_type>
& db,
char_to_type<'N'>
,
char_to_type<'N'>
)
79
{
80
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_product_template::parameters_type
(1,128,64,1,4,2,16,
FETCH_FROM_GLOBAL_STRIDED
,
FETCH_FROM_LOCAL
,16,8));
81
}
82
83
inline
void
add_8B
(
database_type<row_wise_reduction_template::parameters_type>
& db,
char_to_type<'T'>
)
84
{
85
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
row_wise_reduction_template::parameters_type
(1,2,64,1024,
FETCH_FROM_GLOBAL_STRIDED
));
86
}
87
88
inline
void
add_8B
(
database_type<row_wise_reduction_template::parameters_type>
& db,
char_to_type<'N'>
)
89
{
90
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
row_wise_reduction_template::parameters_type
(8,16,8,32768,
FETCH_FROM_GLOBAL_CONTIGUOUS
));
91
}
92
93
inline
void
add_4B
(
database_type<row_wise_reduction_template::parameters_type>
& db,
char_to_type<'T'>
)
94
{
95
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
row_wise_reduction_template::parameters_type
(1,1,128,2048,
FETCH_FROM_GLOBAL_STRIDED
));
96
}
97
98
inline
void
add_4B
(
database_type<row_wise_reduction_template::parameters_type>
& db,
char_to_type<'N'>
)
99
{
100
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
row_wise_reduction_template::parameters_type
(1,32,8,2048,
FETCH_FROM_GLOBAL_CONTIGUOUS
));
101
}
102
103
inline
void
add_8B
(
database_type<reduction_template::parameters_type>
& db)
104
{
105
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
reduction_template::parameters_type
(1,256,4096,
FETCH_FROM_GLOBAL_STRIDED
));
106
}
107
108
inline
void
add_4B
(
database_type<reduction_template::parameters_type>
& db)
109
{
110
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
reduction_template::parameters_type
(1,128,512,
FETCH_FROM_GLOBAL_STRIDED
));
111
}
112
113
inline
void
add_8B
(
database_type<matrix_axpy_template::parameters_type>
& db)
114
{
115
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_axpy_template::parameters_type
(1,64,8,128,128,
FETCH_FROM_GLOBAL_STRIDED
));
116
}
117
118
inline
void
add_4B
(
database_type<matrix_axpy_template::parameters_type>
& db)
119
{
120
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
matrix_axpy_template::parameters_type
(1,32,4,128,128,
FETCH_FROM_GLOBAL_CONTIGUOUS
));
121
}
122
123
inline
void
add_8B
(
database_type<vector_axpy_template::parameters_type>
& db)
124
{
125
db.
add_8B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
vector_axpy_template::parameters_type
(1,256,16384,
FETCH_FROM_GLOBAL_STRIDED
));
126
}
127
128
inline
void
add_4B
(
database_type<vector_axpy_template::parameters_type>
& db)
129
{
130
db.
add_4B
(
nvidia_id
, CL_DEVICE_TYPE_GPU,
ocl::unknown
,
"Tesla K20m"
,
vector_axpy_template::parameters_type
(1,256,16384,
FETCH_FROM_GLOBAL_STRIDED
));
131
}
132
133
}
134
}
135
}
136
}
137
}
138
}
139
}
140
}
141
#endif
viennacl::ocl::nvidia_id
Definition:
device_utils.hpp:49
viennacl::device_specific::builtin_database::database_type
Definition:
common.hpp:44
matrix_product_template.hpp
viennacl::ocl::kepler
Definition:
device_utils.hpp:60
viennacl
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition:
cpu_ram.hpp:34
viennacl::device_specific::builtin_database::database_type::add_8B
database_type< ParamT > & add_8B(vendor_id_type p0, device_type p1, ocl::device_architecture_family p2, device_name_type p3, ParamT const &p5)
Definition:
common.hpp:83
viennacl::device_specific::FETCH_FROM_GLOBAL_STRIDED
Definition:
template_base.hpp:50
common.hpp
viennacl::device_specific::builtin_database::database_type::add_4B
database_type< ParamT > & add_4B(vendor_id_type p0, device_type p1, ocl::device_architecture_family p2, device_name_type p3, ParamT const &p5)
Definition:
common.hpp:76
viennacl::device_specific::builtin_database::devices::gpu::nvidia::kepler::tesla_k20m::add_4B
void add_4B(database_type< matrix_product_template::parameters_type > &db, char_to_type<'T'>, char_to_type<'T'>)
Definition:
tesla_k20m.hpp:63
matrix_axpy_template.hpp
viennacl::device_specific::char_to_type
Definition:
forwards.h:260
forwards.h
Forwards declaration.
vector_axpy_template.hpp
viennacl::device_specific::FETCH_FROM_LOCAL
Definition:
template_base.hpp:49
row_wise_reduction_template.hpp
viennacl::device_specific::builtin_database::devices::gpu::nvidia::kepler::tesla_k20m::add_8B
void add_8B(database_type< matrix_product_template::parameters_type > &db, char_to_type<'T'>, char_to_type<'T'>)
Definition:
tesla_k20m.hpp:43
viennacl::device_specific::FETCH_FROM_GLOBAL_CONTIGUOUS
Definition:
template_base.hpp:51
viennacl::device_specific::template_base::parameters_type
Definition:
template_base.hpp:57
reduction_template.hpp
viennacl::ocl::unknown
Definition:
device_utils.hpp:69
viennacl::device_specific::template_base_impl< matrix_product_template, matrix_product_parameters >::parameters_type
matrix_product_parameters parameters_type
Definition:
template_base.hpp:527
viennacl
device_specific
builtin_database
devices
gpu
nvidia
kepler
tesla_k20m.hpp
Generated on Fri Jul 31 2015 14:18:57 for ViennaCL - The Vienna Computing Library by
1.8.9.1