00001 #ifndef VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_COL_COL_SOURCE_HPP_
00002 #define VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_COL_COL_SOURCE_HPP_
00003
00004 namespace viennacl
00005 {
00006 namespace linalg
00007 {
00008 namespace kernels
00009 {
00010 const char * const matrix_solve_col_col_align1_upper_trans_solve =
00011 "// file automatically generated - do not edit!\n"
00012 "// inplace solve A \\ B^T\n"
00013 "// matrix layouts: A...col_major, B...col_major\n"
00014 "__kernel void upper_trans_solve(\n"
00015 " __global const float * A,\n"
00016 " unsigned int A_rows,\n"
00017 " unsigned int A_cols,\n"
00018 " unsigned int A_internal_rows,\n"
00019 " unsigned int A_internal_cols,\n"
00020 " __global float * B, \n"
00021 " unsigned int B_rows,\n"
00022 " unsigned int B_cols,\n"
00023 " unsigned int B_internal_rows,\n"
00024 " unsigned int B_internal_cols)\n"
00025 "{ \n"
00026 " float temp; \n"
00027 " for (int row = A_rows-1; row > -1; --row) \n"
00028 " { \n"
00029 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00030 " if (get_local_id(0) == 0) \n"
00031 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
00032 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00033 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00034 " //eliminate column of op(A) with index 'row' in parallel: \n"
00035 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00036 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
00037 " }\n"
00038 "}\n"
00039 ;
00040
00041 const char * const matrix_solve_col_col_align1_trans_upper_trans_solve =
00042 "// file automatically generated - do not edit!\n"
00043 "// inplace solve A^T \\ B^T\n"
00044 "// matrix layouts: A...col_major, B...col_major\n"
00045 "__kernel void trans_upper_trans_solve(\n"
00046 " __global const float * A,\n"
00047 " unsigned int A_rows,\n"
00048 " unsigned int A_cols,\n"
00049 " unsigned int A_internal_rows,\n"
00050 " unsigned int A_internal_cols,\n"
00051 " __global float * B, \n"
00052 " unsigned int B_rows,\n"
00053 " unsigned int B_cols,\n"
00054 " unsigned int B_internal_rows,\n"
00055 " unsigned int B_internal_cols)\n"
00056 "{ \n"
00057 " float temp; \n"
00058 " for (int row = A_rows-1; row > -1; --row) \n"
00059 " { \n"
00060 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00061 " if (get_local_id(0) == 0) \n"
00062 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
00063 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00064 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00065 " //eliminate column of op(A) with index 'row' in parallel: \n"
00066 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00067 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
00068 " }\n"
00069 "}\n"
00070 ;
00071
00072 const char * const matrix_solve_col_col_align1_unit_lower_solve =
00073 "// file automatically generated - do not edit!\n"
00074 "// inplace solve A \\ B\n"
00075 "// matrix layouts: A...col_major, B...col_major\n"
00076 "__kernel void unit_lower_solve(\n"
00077 " __global const float * A,\n"
00078 " unsigned int A_rows,\n"
00079 " unsigned int A_cols,\n"
00080 " unsigned int A_internal_rows,\n"
00081 " unsigned int A_internal_cols,\n"
00082 " __global float * B, \n"
00083 " unsigned int B_rows,\n"
00084 " unsigned int B_cols,\n"
00085 " unsigned int B_internal_rows,\n"
00086 " unsigned int B_internal_cols)\n"
00087 "{ \n"
00088 " float temp; \n"
00089 " for (int row = 0; row < A_rows; ++row) \n"
00090 " { \n"
00091 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00092 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00093 " //eliminate column of op(A) with index 'row' in parallel: \n"
00094 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00095 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
00096 " }\n"
00097 "}\n"
00098 ;
00099
00100 const char * const matrix_solve_col_col_align1_trans_unit_upper_trans_solve =
00101 "// file automatically generated - do not edit!\n"
00102 "// inplace solve A^T \\ B^T\n"
00103 "// matrix layouts: A...col_major, B...col_major\n"
00104 "__kernel void trans_unit_upper_trans_solve(\n"
00105 " __global const float * A,\n"
00106 " unsigned int A_rows,\n"
00107 " unsigned int A_cols,\n"
00108 " unsigned int A_internal_rows,\n"
00109 " unsigned int A_internal_cols,\n"
00110 " __global float * B, \n"
00111 " unsigned int B_rows,\n"
00112 " unsigned int B_cols,\n"
00113 " unsigned int B_internal_rows,\n"
00114 " unsigned int B_internal_cols)\n"
00115 "{ \n"
00116 " float temp; \n"
00117 " for (int row = A_rows-1; row > -1; --row) \n"
00118 " { \n"
00119 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00120 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00121 " //eliminate column of op(A) with index 'row' in parallel: \n"
00122 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00123 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
00124 " }\n"
00125 "}\n"
00126 ;
00127
00128 const char * const matrix_solve_col_col_align1_trans_upper_solve =
00129 "// file automatically generated - do not edit!\n"
00130 "// inplace solve A^T \\ B\n"
00131 "// matrix layouts: A...col_major, B...col_major\n"
00132 "__kernel void trans_upper_solve(\n"
00133 " __global const float * A,\n"
00134 " unsigned int A_rows,\n"
00135 " unsigned int A_cols,\n"
00136 " unsigned int A_internal_rows,\n"
00137 " unsigned int A_internal_cols,\n"
00138 " __global float * B, \n"
00139 " unsigned int B_rows,\n"
00140 " unsigned int B_cols,\n"
00141 " unsigned int B_internal_rows,\n"
00142 " unsigned int B_internal_cols)\n"
00143 "{ \n"
00144 " float temp; \n"
00145 " for (int row = A_rows-1; row > -1; --row) \n"
00146 " { \n"
00147 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00148 " if (get_local_id(0) == 0) \n"
00149 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
00150 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00151 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00152 " //eliminate column of op(A) with index 'row' in parallel: \n"
00153 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00154 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
00155 " }\n"
00156 "}\n"
00157 ;
00158
00159 const char * const matrix_solve_col_col_align1_unit_lower_trans_solve =
00160 "// file automatically generated - do not edit!\n"
00161 "// inplace solve A \\ B^T\n"
00162 "// matrix layouts: A...col_major, B...col_major\n"
00163 "__kernel void unit_lower_trans_solve(\n"
00164 " __global const float * A,\n"
00165 " unsigned int A_rows,\n"
00166 " unsigned int A_cols,\n"
00167 " unsigned int A_internal_rows,\n"
00168 " unsigned int A_internal_cols,\n"
00169 " __global float * B, \n"
00170 " unsigned int B_rows,\n"
00171 " unsigned int B_cols,\n"
00172 " unsigned int B_internal_rows,\n"
00173 " unsigned int B_internal_cols)\n"
00174 "{ \n"
00175 " float temp; \n"
00176 " for (int row = 0; row < A_rows; ++row) \n"
00177 " { \n"
00178 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00179 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00180 " //eliminate column of op(A) with index 'row' in parallel: \n"
00181 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00182 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
00183 " }\n"
00184 "}\n"
00185 ;
00186
00187 const char * const matrix_solve_col_col_align1_trans_unit_upper_solve =
00188 "// file automatically generated - do not edit!\n"
00189 "// inplace solve A^T \\ B\n"
00190 "// matrix layouts: A...col_major, B...col_major\n"
00191 "__kernel void trans_unit_upper_solve(\n"
00192 " __global const float * A,\n"
00193 " unsigned int A_rows,\n"
00194 " unsigned int A_cols,\n"
00195 " unsigned int A_internal_rows,\n"
00196 " unsigned int A_internal_cols,\n"
00197 " __global float * B, \n"
00198 " unsigned int B_rows,\n"
00199 " unsigned int B_cols,\n"
00200 " unsigned int B_internal_rows,\n"
00201 " unsigned int B_internal_cols)\n"
00202 "{ \n"
00203 " float temp; \n"
00204 " for (int row = A_rows-1; row > -1; --row) \n"
00205 " { \n"
00206 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00207 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00208 " //eliminate column of op(A) with index 'row' in parallel: \n"
00209 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00210 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
00211 " }\n"
00212 "}\n"
00213 ;
00214
00215 const char * const matrix_solve_col_col_align1_trans_unit_lower_trans_solve =
00216 "// file automatically generated - do not edit!\n"
00217 "// inplace solve A^T \\ B^T\n"
00218 "// matrix layouts: A...col_major, B...col_major\n"
00219 "__kernel void trans_unit_lower_trans_solve(\n"
00220 " __global const float * A,\n"
00221 " unsigned int A_rows,\n"
00222 " unsigned int A_cols,\n"
00223 " unsigned int A_internal_rows,\n"
00224 " unsigned int A_internal_cols,\n"
00225 " __global float * B, \n"
00226 " unsigned int B_rows,\n"
00227 " unsigned int B_cols,\n"
00228 " unsigned int B_internal_rows,\n"
00229 " unsigned int B_internal_cols)\n"
00230 "{ \n"
00231 " float temp; \n"
00232 " for (int row = 0; row < A_rows; ++row) \n"
00233 " { \n"
00234 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00235 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00236 " //eliminate column of op(A) with index 'row' in parallel: \n"
00237 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00238 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
00239 " }\n"
00240 "}\n"
00241 ;
00242
00243 const char * const matrix_solve_col_col_align1_lower_trans_solve =
00244 "// file automatically generated - do not edit!\n"
00245 "// inplace solve A \\ B^T\n"
00246 "// matrix layouts: A...col_major, B...col_major\n"
00247 "__kernel void lower_trans_solve(\n"
00248 " __global const float * A,\n"
00249 " unsigned int A_rows,\n"
00250 " unsigned int A_cols,\n"
00251 " unsigned int A_internal_rows,\n"
00252 " unsigned int A_internal_cols,\n"
00253 " __global float * B, \n"
00254 " unsigned int B_rows,\n"
00255 " unsigned int B_cols,\n"
00256 " unsigned int B_internal_rows,\n"
00257 " unsigned int B_internal_cols)\n"
00258 "{ \n"
00259 " float temp; \n"
00260 " for (int row = 0; row < A_rows; ++row) \n"
00261 " { \n"
00262 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00263 " if (get_local_id(0) == 0) \n"
00264 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
00265 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00266 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00267 " //eliminate column of op(A) with index 'row' in parallel: \n"
00268 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00269 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
00270 " }\n"
00271 "}\n"
00272 ;
00273
00274 const char * const matrix_solve_col_col_align1_upper_solve =
00275 "// file automatically generated - do not edit!\n"
00276 "// inplace solve A \\ B\n"
00277 "// matrix layouts: A...col_major, B...col_major\n"
00278 "__kernel void upper_solve(\n"
00279 " __global const float * A,\n"
00280 " unsigned int A_rows,\n"
00281 " unsigned int A_cols,\n"
00282 " unsigned int A_internal_rows,\n"
00283 " unsigned int A_internal_cols,\n"
00284 " __global float * B, \n"
00285 " unsigned int B_rows,\n"
00286 " unsigned int B_cols,\n"
00287 " unsigned int B_internal_rows,\n"
00288 " unsigned int B_internal_cols)\n"
00289 "{ \n"
00290 " float temp; \n"
00291 " for (int row = A_rows-1; row > -1; --row) \n"
00292 " { \n"
00293 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00294 " if (get_local_id(0) == 0) \n"
00295 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
00296 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00297 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00298 " //eliminate column of op(A) with index 'row' in parallel: \n"
00299 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00300 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
00301 " }\n"
00302 "}\n"
00303 ;
00304
00305 const char * const matrix_solve_col_col_align1_trans_lower_trans_solve =
00306 "// file automatically generated - do not edit!\n"
00307 "// inplace solve A^T \\ B^T\n"
00308 "// matrix layouts: A...col_major, B...col_major\n"
00309 "__kernel void trans_lower_trans_solve(\n"
00310 " __global const float * A,\n"
00311 " unsigned int A_rows,\n"
00312 " unsigned int A_cols,\n"
00313 " unsigned int A_internal_rows,\n"
00314 " unsigned int A_internal_cols,\n"
00315 " __global float * B, \n"
00316 " unsigned int B_rows,\n"
00317 " unsigned int B_cols,\n"
00318 " unsigned int B_internal_rows,\n"
00319 " unsigned int B_internal_cols)\n"
00320 "{ \n"
00321 " float temp; \n"
00322 " for (int row = 0; row < A_rows; ++row) \n"
00323 " { \n"
00324 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00325 " if (get_local_id(0) == 0) \n"
00326 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
00327 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00328 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00329 " //eliminate column of op(A) with index 'row' in parallel: \n"
00330 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00331 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
00332 " }\n"
00333 "}\n"
00334 ;
00335
00336 const char * const matrix_solve_col_col_align1_lower_solve =
00337 "// file automatically generated - do not edit!\n"
00338 "// inplace solve A \\ B\n"
00339 "// matrix layouts: A...col_major, B...col_major\n"
00340 "__kernel void lower_solve(\n"
00341 " __global const float * A,\n"
00342 " unsigned int A_rows,\n"
00343 " unsigned int A_cols,\n"
00344 " unsigned int A_internal_rows,\n"
00345 " unsigned int A_internal_cols,\n"
00346 " __global float * B, \n"
00347 " unsigned int B_rows,\n"
00348 " unsigned int B_cols,\n"
00349 " unsigned int B_internal_rows,\n"
00350 " unsigned int B_internal_cols)\n"
00351 "{ \n"
00352 " float temp; \n"
00353 " for (int row = 0; row < A_rows; ++row) \n"
00354 " { \n"
00355 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00356 " if (get_local_id(0) == 0) \n"
00357 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
00358 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00359 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00360 " //eliminate column of op(A) with index 'row' in parallel: \n"
00361 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00362 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
00363 " }\n"
00364 "}\n"
00365 ;
00366
00367 const char * const matrix_solve_col_col_align1_trans_lower_solve =
00368 "// file automatically generated - do not edit!\n"
00369 "// inplace solve A^T \\ B\n"
00370 "// matrix layouts: A...col_major, B...col_major\n"
00371 "__kernel void trans_lower_solve(\n"
00372 " __global const float * A,\n"
00373 " unsigned int A_rows,\n"
00374 " unsigned int A_cols,\n"
00375 " unsigned int A_internal_rows,\n"
00376 " unsigned int A_internal_cols,\n"
00377 " __global float * B, \n"
00378 " unsigned int B_rows,\n"
00379 " unsigned int B_cols,\n"
00380 " unsigned int B_internal_rows,\n"
00381 " unsigned int B_internal_cols)\n"
00382 "{ \n"
00383 " float temp; \n"
00384 " for (int row = 0; row < A_rows; ++row) \n"
00385 " { \n"
00386 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00387 " if (get_local_id(0) == 0) \n"
00388 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
00389 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00390 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00391 " //eliminate column of op(A) with index 'row' in parallel: \n"
00392 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00393 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
00394 " }\n"
00395 "}\n"
00396 ;
00397
00398 const char * const matrix_solve_col_col_align1_unit_upper_trans_solve =
00399 "// file automatically generated - do not edit!\n"
00400 "// inplace solve A \\ B^T\n"
00401 "// matrix layouts: A...col_major, B...col_major\n"
00402 "__kernel void unit_upper_trans_solve(\n"
00403 " __global const float * A,\n"
00404 " unsigned int A_rows,\n"
00405 " unsigned int A_cols,\n"
00406 " unsigned int A_internal_rows,\n"
00407 " unsigned int A_internal_cols,\n"
00408 " __global float * B, \n"
00409 " unsigned int B_rows,\n"
00410 " unsigned int B_cols,\n"
00411 " unsigned int B_internal_rows,\n"
00412 " unsigned int B_internal_cols)\n"
00413 "{ \n"
00414 " float temp; \n"
00415 " for (int row = A_rows-1; row > -1; --row) \n"
00416 " { \n"
00417 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00418 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
00419 " //eliminate column of op(A) with index 'row' in parallel: \n"
00420 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00421 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
00422 " }\n"
00423 "}\n"
00424 ;
00425
00426 const char * const matrix_solve_col_col_align1_unit_upper_solve =
00427 "// file automatically generated - do not edit!\n"
00428 "// inplace solve A \\ B\n"
00429 "// matrix layouts: A...col_major, B...col_major\n"
00430 "__kernel void unit_upper_solve(\n"
00431 " __global const float * A,\n"
00432 " unsigned int A_rows,\n"
00433 " unsigned int A_cols,\n"
00434 " unsigned int A_internal_rows,\n"
00435 " unsigned int A_internal_cols,\n"
00436 " __global float * B, \n"
00437 " unsigned int B_rows,\n"
00438 " unsigned int B_cols,\n"
00439 " unsigned int B_internal_rows,\n"
00440 " unsigned int B_internal_cols)\n"
00441 "{ \n"
00442 " float temp; \n"
00443 " for (int row = A_rows-1; row > -1; --row) \n"
00444 " { \n"
00445 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00446 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00447 " //eliminate column of op(A) with index 'row' in parallel: \n"
00448 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
00449 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
00450 " }\n"
00451 "}\n"
00452 ;
00453
00454 const char * const matrix_solve_col_col_align1_trans_unit_lower_solve =
00455 "// file automatically generated - do not edit!\n"
00456 "// inplace solve A^T \\ B\n"
00457 "// matrix layouts: A...col_major, B...col_major\n"
00458 "__kernel void trans_unit_lower_solve(\n"
00459 " __global const float * A,\n"
00460 " unsigned int A_rows,\n"
00461 " unsigned int A_cols,\n"
00462 " unsigned int A_internal_rows,\n"
00463 " unsigned int A_internal_cols,\n"
00464 " __global float * B, \n"
00465 " unsigned int B_rows,\n"
00466 " unsigned int B_cols,\n"
00467 " unsigned int B_internal_rows,\n"
00468 " unsigned int B_internal_cols)\n"
00469 "{ \n"
00470 " float temp; \n"
00471 " for (int row = 0; row < A_rows; ++row) \n"
00472 " { \n"
00473 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
00474 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
00475 " //eliminate column of op(A) with index 'row' in parallel: \n"
00476 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
00477 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
00478 " }\n"
00479 "}\n"
00480 ;
00481
00482 }
00483 }
00484 }
00485 #endif