• Main Page
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

/data/development/ViennaCL/dev/viennacl/linalg/kernels/matrix_solve_row_col_source.h

Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_ROW_COL_SOURCE_HPP_
00002 #define VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_ROW_COL_SOURCE_HPP_
00003 //Automatically generated file from auxiliary-directory, do not edit manually!
00004 namespace viennacl
00005 {
00006  namespace linalg
00007  {
00008   namespace kernels
00009   {
00010 const char * const matrix_solve_row_col_align1_upper_trans_solve = 
00011 "// file automatically generated - do not edit!\n"
00012 "// inplace solve A \\ B^T\n"
00013 "// matrix layouts: A...row_major, B...col_major\n"
00014 "__kernel void upper_trans_solve(\n"
00015 "          __global const float * A,\n"
00016 "          unsigned int A_rows,\n"
00017 "          unsigned int A_cols,\n"
00018 "          unsigned int A_internal_rows,\n"
00019 "          unsigned int A_internal_cols,\n"
00020 "          __global float * B,  \n"
00021 "          unsigned int B_rows,\n"
00022 "          unsigned int B_cols,\n"
00023 "          unsigned int B_internal_rows,\n"
00024 "          unsigned int B_internal_cols)\n"
00025 "{ \n"
00026 "  float temp; \n"
00027 "  for (int row = A_rows-1; row > -1; --row) \n"
00028 "  { \n"
00029 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00030 "    if (get_local_id(0) == 0) \n"
00031 "      B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
00032 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00033 "      temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00034 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00035 "    for  (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00036 "      B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n"
00037 "   }\n"
00038 "}\n"
00039 ; //matrix_solve_row_col_align1_upper_trans_solve
00040 
00041 const char * const matrix_solve_row_col_align1_trans_upper_trans_solve = 
00042 "// file automatically generated - do not edit!\n"
00043 "// inplace solve A^T \\ B^T\n"
00044 "// matrix layouts: A...row_major, B...col_major\n"
00045 "__kernel void trans_upper_trans_solve(\n"
00046 "          __global const float * A,\n"
00047 "          unsigned int A_rows,\n"
00048 "          unsigned int A_cols,\n"
00049 "          unsigned int A_internal_rows,\n"
00050 "          unsigned int A_internal_cols,\n"
00051 "          __global float * B,  \n"
00052 "          unsigned int B_rows,\n"
00053 "          unsigned int B_cols,\n"
00054 "          unsigned int B_internal_rows,\n"
00055 "          unsigned int B_internal_cols)\n"
00056 "{ \n"
00057 "  float temp; \n"
00058 "  for (int row = A_rows-1; row > -1; --row) \n"
00059 "  { \n"
00060 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00061 "    if (get_local_id(0) == 0) \n"
00062 "      B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
00063 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00064 "      temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00065 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00066 "    for  (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00067 "      B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n"
00068 "   }\n"
00069 "}\n"
00070 ; //matrix_solve_row_col_align1_trans_upper_trans_solve
00071 
00072 const char * const matrix_solve_row_col_align1_unit_lower_solve = 
00073 "// file automatically generated - do not edit!\n"
00074 "// inplace solve A \\ B\n"
00075 "// matrix layouts: A...row_major, B...col_major\n"
00076 "__kernel void unit_lower_solve(\n"
00077 "          __global const float * A,\n"
00078 "          unsigned int A_rows,\n"
00079 "          unsigned int A_cols,\n"
00080 "          unsigned int A_internal_rows,\n"
00081 "          unsigned int A_internal_cols,\n"
00082 "          __global float * B,  \n"
00083 "          unsigned int B_rows,\n"
00084 "          unsigned int B_cols,\n"
00085 "          unsigned int B_internal_rows,\n"
00086 "          unsigned int B_internal_cols)\n"
00087 "{ \n"
00088 "  float temp; \n"
00089 "  for (int row = 0; row < A_rows; ++row) \n"
00090 "  { \n"
00091 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00092 "      temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00093 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00094 "    for  (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00095 "      B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_cols + row];\n"
00096 "   }\n"
00097 "}\n"
00098 ; //matrix_solve_row_col_align1_unit_lower_solve
00099 
00100 const char * const matrix_solve_row_col_align1_trans_unit_upper_trans_solve = 
00101 "// file automatically generated - do not edit!\n"
00102 "// inplace solve A^T \\ B^T\n"
00103 "// matrix layouts: A...row_major, B...col_major\n"
00104 "__kernel void trans_unit_upper_trans_solve(\n"
00105 "          __global const float * A,\n"
00106 "          unsigned int A_rows,\n"
00107 "          unsigned int A_cols,\n"
00108 "          unsigned int A_internal_rows,\n"
00109 "          unsigned int A_internal_cols,\n"
00110 "          __global float * B,  \n"
00111 "          unsigned int B_rows,\n"
00112 "          unsigned int B_cols,\n"
00113 "          unsigned int B_internal_rows,\n"
00114 "          unsigned int B_internal_cols)\n"
00115 "{ \n"
00116 "  float temp; \n"
00117 "  for (int row = A_rows-1; row > -1; --row) \n"
00118 "  { \n"
00119 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00120 "      temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00121 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00122 "    for  (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00123 "      B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n"
00124 "   }\n"
00125 "}\n"
00126 ; //matrix_solve_row_col_align1_trans_unit_upper_trans_solve
00127 
00128 const char * const matrix_solve_row_col_align1_trans_upper_solve = 
00129 "// file automatically generated - do not edit!\n"
00130 "// inplace solve A^T \\ B\n"
00131 "// matrix layouts: A...row_major, B...col_major\n"
00132 "__kernel void trans_upper_solve(\n"
00133 "          __global const float * A,\n"
00134 "          unsigned int A_rows,\n"
00135 "          unsigned int A_cols,\n"
00136 "          unsigned int A_internal_rows,\n"
00137 "          unsigned int A_internal_cols,\n"
00138 "          __global float * B,  \n"
00139 "          unsigned int B_rows,\n"
00140 "          unsigned int B_cols,\n"
00141 "          unsigned int B_internal_rows,\n"
00142 "          unsigned int B_internal_cols)\n"
00143 "{ \n"
00144 "  float temp; \n"
00145 "  for (int row = A_rows-1; row > -1; --row) \n"
00146 "  { \n"
00147 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00148 "    if (get_local_id(0) == 0) \n"
00149 "      B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
00150 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00151 "      temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00152 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00153 "    for  (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00154 "      B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_cols];\n"
00155 "   }\n"
00156 "}\n"
00157 ; //matrix_solve_row_col_align1_trans_upper_solve
00158 
00159 const char * const matrix_solve_row_col_align1_unit_lower_trans_solve = 
00160 "// file automatically generated - do not edit!\n"
00161 "// inplace solve A \\ B^T\n"
00162 "// matrix layouts: A...row_major, B...col_major\n"
00163 "__kernel void unit_lower_trans_solve(\n"
00164 "          __global const float * A,\n"
00165 "          unsigned int A_rows,\n"
00166 "          unsigned int A_cols,\n"
00167 "          unsigned int A_internal_rows,\n"
00168 "          unsigned int A_internal_cols,\n"
00169 "          __global float * B,  \n"
00170 "          unsigned int B_rows,\n"
00171 "          unsigned int B_cols,\n"
00172 "          unsigned int B_internal_rows,\n"
00173 "          unsigned int B_internal_cols)\n"
00174 "{ \n"
00175 "  float temp; \n"
00176 "  for (int row = 0; row < A_rows; ++row) \n"
00177 "  { \n"
00178 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00179 "      temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00180 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00181 "    for  (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00182 "      B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n"
00183 "   }\n"
00184 "}\n"
00185 ; //matrix_solve_row_col_align1_unit_lower_trans_solve
00186 
00187 const char * const matrix_solve_row_col_align1_trans_unit_upper_solve = 
00188 "// file automatically generated - do not edit!\n"
00189 "// inplace solve A^T \\ B\n"
00190 "// matrix layouts: A...row_major, B...col_major\n"
00191 "__kernel void trans_unit_upper_solve(\n"
00192 "          __global const float * A,\n"
00193 "          unsigned int A_rows,\n"
00194 "          unsigned int A_cols,\n"
00195 "          unsigned int A_internal_rows,\n"
00196 "          unsigned int A_internal_cols,\n"
00197 "          __global float * B,  \n"
00198 "          unsigned int B_rows,\n"
00199 "          unsigned int B_cols,\n"
00200 "          unsigned int B_internal_rows,\n"
00201 "          unsigned int B_internal_cols)\n"
00202 "{ \n"
00203 "  float temp; \n"
00204 "  for (int row = A_rows-1; row > -1; --row) \n"
00205 "  { \n"
00206 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00207 "      temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00208 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00209 "    for  (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00210 "      B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_cols];\n"
00211 "   }\n"
00212 "}\n"
00213 ; //matrix_solve_row_col_align1_trans_unit_upper_solve
00214 
00215 const char * const matrix_solve_row_col_align1_trans_unit_lower_trans_solve = 
00216 "// file automatically generated - do not edit!\n"
00217 "// inplace solve A^T \\ B^T\n"
00218 "// matrix layouts: A...row_major, B...col_major\n"
00219 "__kernel void trans_unit_lower_trans_solve(\n"
00220 "          __global const float * A,\n"
00221 "          unsigned int A_rows,\n"
00222 "          unsigned int A_cols,\n"
00223 "          unsigned int A_internal_rows,\n"
00224 "          unsigned int A_internal_cols,\n"
00225 "          __global float * B,  \n"
00226 "          unsigned int B_rows,\n"
00227 "          unsigned int B_cols,\n"
00228 "          unsigned int B_internal_rows,\n"
00229 "          unsigned int B_internal_cols)\n"
00230 "{ \n"
00231 "  float temp; \n"
00232 "  for (int row = 0; row < A_rows; ++row) \n"
00233 "  { \n"
00234 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00235 "      temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00236 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00237 "    for  (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00238 "      B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n"
00239 "   }\n"
00240 "}\n"
00241 ; //matrix_solve_row_col_align1_trans_unit_lower_trans_solve
00242 
00243 const char * const matrix_solve_row_col_align1_lower_trans_solve = 
00244 "// file automatically generated - do not edit!\n"
00245 "// inplace solve A \\ B^T\n"
00246 "// matrix layouts: A...row_major, B...col_major\n"
00247 "__kernel void lower_trans_solve(\n"
00248 "          __global const float * A,\n"
00249 "          unsigned int A_rows,\n"
00250 "          unsigned int A_cols,\n"
00251 "          unsigned int A_internal_rows,\n"
00252 "          unsigned int A_internal_cols,\n"
00253 "          __global float * B,  \n"
00254 "          unsigned int B_rows,\n"
00255 "          unsigned int B_cols,\n"
00256 "          unsigned int B_internal_rows,\n"
00257 "          unsigned int B_internal_cols)\n"
00258 "{ \n"
00259 "  float temp; \n"
00260 "  for (int row = 0; row < A_rows; ++row) \n"
00261 "  { \n"
00262 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00263 "    if (get_local_id(0) == 0) \n"
00264 "      B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
00265 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00266 "      temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00267 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00268 "    for  (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00269 "      B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n"
00270 "   }\n"
00271 "}\n"
00272 ; //matrix_solve_row_col_align1_lower_trans_solve
00273 
00274 const char * const matrix_solve_row_col_align1_upper_solve = 
00275 "// file automatically generated - do not edit!\n"
00276 "// inplace solve A \\ B\n"
00277 "// matrix layouts: A...row_major, B...col_major\n"
00278 "__kernel void upper_solve(\n"
00279 "          __global const float * A,\n"
00280 "          unsigned int A_rows,\n"
00281 "          unsigned int A_cols,\n"
00282 "          unsigned int A_internal_rows,\n"
00283 "          unsigned int A_internal_cols,\n"
00284 "          __global float * B,  \n"
00285 "          unsigned int B_rows,\n"
00286 "          unsigned int B_cols,\n"
00287 "          unsigned int B_internal_rows,\n"
00288 "          unsigned int B_internal_cols)\n"
00289 "{ \n"
00290 "  float temp; \n"
00291 "  for (int row = A_rows-1; row > -1; --row) \n"
00292 "  { \n"
00293 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00294 "    if (get_local_id(0) == 0) \n"
00295 "      B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
00296 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00297 "      temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00298 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00299 "    for  (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00300 "      B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_cols + row];\n"
00301 "   }\n"
00302 "}\n"
00303 ; //matrix_solve_row_col_align1_upper_solve
00304 
00305 const char * const matrix_solve_row_col_align1_trans_lower_trans_solve = 
00306 "// file automatically generated - do not edit!\n"
00307 "// inplace solve A^T \\ B^T\n"
00308 "// matrix layouts: A...row_major, B...col_major\n"
00309 "__kernel void trans_lower_trans_solve(\n"
00310 "          __global const float * A,\n"
00311 "          unsigned int A_rows,\n"
00312 "          unsigned int A_cols,\n"
00313 "          unsigned int A_internal_rows,\n"
00314 "          unsigned int A_internal_cols,\n"
00315 "          __global float * B,  \n"
00316 "          unsigned int B_rows,\n"
00317 "          unsigned int B_cols,\n"
00318 "          unsigned int B_internal_rows,\n"
00319 "          unsigned int B_internal_cols)\n"
00320 "{ \n"
00321 "  float temp; \n"
00322 "  for (int row = 0; row < A_rows; ++row) \n"
00323 "  { \n"
00324 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00325 "    if (get_local_id(0) == 0) \n"
00326 "      B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
00327 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00328 "      temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00329 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00330 "    for  (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00331 "      B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n"
00332 "   }\n"
00333 "}\n"
00334 ; //matrix_solve_row_col_align1_trans_lower_trans_solve
00335 
00336 const char * const matrix_solve_row_col_align1_lower_solve = 
00337 "// file automatically generated - do not edit!\n"
00338 "// inplace solve A \\ B\n"
00339 "// matrix layouts: A...row_major, B...col_major\n"
00340 "__kernel void lower_solve(\n"
00341 "          __global const float * A,\n"
00342 "          unsigned int A_rows,\n"
00343 "          unsigned int A_cols,\n"
00344 "          unsigned int A_internal_rows,\n"
00345 "          unsigned int A_internal_cols,\n"
00346 "          __global float * B,  \n"
00347 "          unsigned int B_rows,\n"
00348 "          unsigned int B_cols,\n"
00349 "          unsigned int B_internal_rows,\n"
00350 "          unsigned int B_internal_cols)\n"
00351 "{ \n"
00352 "  float temp; \n"
00353 "  for (int row = 0; row < A_rows; ++row) \n"
00354 "  { \n"
00355 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00356 "    if (get_local_id(0) == 0) \n"
00357 "      B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
00358 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00359 "      temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00360 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00361 "    for  (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00362 "      B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_cols + row];\n"
00363 "   }\n"
00364 "}\n"
00365 ; //matrix_solve_row_col_align1_lower_solve
00366 
00367 const char * const matrix_solve_row_col_align1_trans_lower_solve = 
00368 "// file automatically generated - do not edit!\n"
00369 "// inplace solve A^T \\ B\n"
00370 "// matrix layouts: A...row_major, B...col_major\n"
00371 "__kernel void trans_lower_solve(\n"
00372 "          __global const float * A,\n"
00373 "          unsigned int A_rows,\n"
00374 "          unsigned int A_cols,\n"
00375 "          unsigned int A_internal_rows,\n"
00376 "          unsigned int A_internal_cols,\n"
00377 "          __global float * B,  \n"
00378 "          unsigned int B_rows,\n"
00379 "          unsigned int B_cols,\n"
00380 "          unsigned int B_internal_rows,\n"
00381 "          unsigned int B_internal_cols)\n"
00382 "{ \n"
00383 "  float temp; \n"
00384 "  for (int row = 0; row < A_rows; ++row) \n"
00385 "  { \n"
00386 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00387 "    if (get_local_id(0) == 0) \n"
00388 "      B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
00389 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00390 "      temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00391 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00392 "    for  (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00393 "      B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_cols];\n"
00394 "   }\n"
00395 "}\n"
00396 ; //matrix_solve_row_col_align1_trans_lower_solve
00397 
00398 const char * const matrix_solve_row_col_align1_unit_upper_trans_solve = 
00399 "// file automatically generated - do not edit!\n"
00400 "// inplace solve A \\ B^T\n"
00401 "// matrix layouts: A...row_major, B...col_major\n"
00402 "__kernel void unit_upper_trans_solve(\n"
00403 "          __global const float * A,\n"
00404 "          unsigned int A_rows,\n"
00405 "          unsigned int A_cols,\n"
00406 "          unsigned int A_internal_rows,\n"
00407 "          unsigned int A_internal_cols,\n"
00408 "          __global float * B,  \n"
00409 "          unsigned int B_rows,\n"
00410 "          unsigned int B_cols,\n"
00411 "          unsigned int B_internal_rows,\n"
00412 "          unsigned int B_internal_cols)\n"
00413 "{ \n"
00414 "  float temp; \n"
00415 "  for (int row = A_rows-1; row > -1; --row) \n"
00416 "  { \n"
00417 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00418 "      temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00419 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00420 "    for  (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00421 "      B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n"
00422 "   }\n"
00423 "}\n"
00424 ; //matrix_solve_row_col_align1_unit_upper_trans_solve
00425 
00426 const char * const matrix_solve_row_col_align1_unit_upper_solve = 
00427 "// file automatically generated - do not edit!\n"
00428 "// inplace solve A \\ B\n"
00429 "// matrix layouts: A...row_major, B...col_major\n"
00430 "__kernel void unit_upper_solve(\n"
00431 "          __global const float * A,\n"
00432 "          unsigned int A_rows,\n"
00433 "          unsigned int A_cols,\n"
00434 "          unsigned int A_internal_rows,\n"
00435 "          unsigned int A_internal_cols,\n"
00436 "          __global float * B,  \n"
00437 "          unsigned int B_rows,\n"
00438 "          unsigned int B_cols,\n"
00439 "          unsigned int B_internal_rows,\n"
00440 "          unsigned int B_internal_cols)\n"
00441 "{ \n"
00442 "  float temp; \n"
00443 "  for (int row = A_rows-1; row > -1; --row) \n"
00444 "  { \n"
00445 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00446 "      temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00447 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00448 "    for  (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00449 "      B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_cols + row];\n"
00450 "   }\n"
00451 "}\n"
00452 ; //matrix_solve_row_col_align1_unit_upper_solve
00453 
00454 const char * const matrix_solve_row_col_align1_trans_unit_lower_solve = 
00455 "// file automatically generated - do not edit!\n"
00456 "// inplace solve A^T \\ B\n"
00457 "// matrix layouts: A...row_major, B...col_major\n"
00458 "__kernel void trans_unit_lower_solve(\n"
00459 "          __global const float * A,\n"
00460 "          unsigned int A_rows,\n"
00461 "          unsigned int A_cols,\n"
00462 "          unsigned int A_internal_rows,\n"
00463 "          unsigned int A_internal_cols,\n"
00464 "          __global float * B,  \n"
00465 "          unsigned int B_rows,\n"
00466 "          unsigned int B_cols,\n"
00467 "          unsigned int B_internal_rows,\n"
00468 "          unsigned int B_internal_cols)\n"
00469 "{ \n"
00470 "  float temp; \n"
00471 "  for (int row = 0; row < A_rows; ++row) \n"
00472 "  { \n"
00473 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00474 "      temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00475 "    //eliminate column of op(A) with index 'row' in parallel: \n"
00476 "    for  (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00477 "      B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_cols];\n"
00478 "   }\n"
00479 "}\n"
00480 ; //matrix_solve_row_col_align1_trans_unit_lower_solve
00481 
00482   }  //namespace kernels
00483  }  //namespace linalg
00484 }  //namespace viennacl
00485 #endif

Generated on Fri Dec 30 2011 23:20:43 for ViennaCL - The Vienna Computing Library by  doxygen 1.7.1