ViennaCL - The Vienna Computing Library: /data/development/ViennaCL/dev/viennacl/linalg/kernels/coordinate_matrix

Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_KERNELS_COORDINATE_MATRIX_SOURCE_HPP_
00002 #define VIENNACL_LINALG_KERNELS_COORDINATE_MATRIX_SOURCE_HPP_
00003 //Automatically generated file from auxiliary-directory, do not edit manually!
00004 namespace viennacl
00005 {
00006  namespace linalg
00007  {
00008   namespace kernels
00009   {
00010 const char * const coordinate_matrix_align1_vec_mul = 
00011 "//segmented parallel reduction. At present restricted to up to 256 threads\n"
00012 "void segmented_parallel_reduction(unsigned int row, \n"
00013 "                                  float val, \n"
00014 "                                  __local unsigned int * shared_rows, \n"
00015 "                                  __local float * inter_results) \n"
00016 "{ \n"
00017 "  //barrier(CLK_LOCAL_MEM_FENCE); \n"
00018 "  shared_rows[get_local_id(0)] = row; \n"
00019 "  inter_results[get_local_id(0)] = val; \n"
00020 "  float left = 0;\n"
00021 " \n"
00022 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00023 "  if( get_local_id(0) >=  1 && row == shared_rows[get_local_id(0) -  1] ) { left = inter_results[get_local_id(0) -  1]; }  \n"
00024 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00025 "  inter_results[get_local_id(0)] += left; left = 0;\n"
00026 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00027 "  if( get_local_id(0) >=  2 && row == shared_rows[get_local_id(0) -  2] ) { left = inter_results[get_local_id(0) -  2]; } \n"
00028 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00029 "  inter_results[get_local_id(0)] += left; left = 0;\n"
00030 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00031 "  if( get_local_id(0) >=  4 && row == shared_rows[get_local_id(0) -  4] ) { left = inter_results[get_local_id(0) -  4]; } \n"
00032 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00033 "  inter_results[get_local_id(0)] += left; left = 0;\n"
00034 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00035 "  if( get_local_id(0) >=  8 && row == shared_rows[get_local_id(0) -  8] ) { left = inter_results[get_local_id(0) -  8]; } \n"
00036 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00037 "  inter_results[get_local_id(0)] += left; left = 0;\n"
00038 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00039 "  if( get_local_id(0) >= 16 && row == shared_rows[get_local_id(0) - 16] ) { left = inter_results[get_local_id(0) - 16]; } \n"
00040 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00041 "  inter_results[get_local_id(0)] += left; left = 0;\n"
00042 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00043 "  if( get_local_id(0) >= 32 && row == shared_rows[get_local_id(0) - 32] ) { left = inter_results[get_local_id(0) - 32]; } \n"
00044 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00045 "  inter_results[get_local_id(0)] += left; left = 0;\n"
00046 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00047 "  if( get_local_id(0) >= 64 && row == shared_rows[get_local_id(0) - 64] ) { left = inter_results[get_local_id(0) - 64]; } \n"
00048 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00049 "  inter_results[get_local_id(0)] += left; left = 0;\n"
00050 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00051 "  if( get_local_id(0) >= 128 && row == shared_rows[get_local_id(0) - 128] ) { left = inter_results[get_local_id(0) - 128]; } \n"
00052 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00053 "  inter_results[get_local_id(0)] += left; left = 0;\n"
00054 "  barrier(CLK_LOCAL_MEM_FENCE); \n"
00055 "  //if( get_local_id(0) >= 256 && row == shared_rows[get_local_id(0) - 256] ) { left = inter_results[get_local_id(0) - 256]; } \n"
00056 "  //barrier(CLK_LOCAL_MEM_FENCE);  \n"
00057 "  //inter_results[get_local_id(0)] += left; left = 0;\n"
00058 "  //barrier(CLK_LOCAL_MEM_FENCE); \n"
00059 "}\n"
00060 "__kernel void vec_mul( \n"
00061 "          __global const uint2 * coords, //(row_index, column_index) \n"
00062 "          __global const float * elements, \n"
00063 "          __global const uint  * group_boundaries,\n"
00064 "          __global const float * vector,  \n"
00065 "          __global float * result, \n"
00066 "          __local unsigned int * shared_rows, \n"
00067 "          __local float * inter_results) \n"
00068 "{ \n"
00069 "  uint2 tmp; \n"
00070 "  float val;\n"
00071 "  uint last_index = get_local_size(0) - 1;\n"
00072 "  uint group_start = group_boundaries[get_group_id(0)];\n"
00073 "  uint group_end = group_boundaries[get_group_id(0) + 1];\n"
00074 "  uint k_end = 1 + (group_end - group_start - 1) / get_local_size(0);   // -1 in order to have correct behavior if group_end - group_start == j * get_local_size(0)\n"
00075 "  uint local_index = 0;\n"
00076 "  for (uint k = 0; k < k_end; ++k)\n"
00077 "  { \n"
00078 "    barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
00079 "    \n"
00080 "    local_index = group_start + k * get_local_size(0) + get_local_id(0); \n"
00081 "  \n"
00082 "    if (local_index < group_end)\n"
00083 "    {\n"
00084 "      tmp = coords[local_index]; \n"
00085 "      val = elements[local_index] * vector[tmp.y]; \n"
00086 "    }\n"
00087 "    else\n"
00088 "    {\n"
00089 "      tmp.x = 0;\n"
00090 "      tmp.y = 0;\n"
00091 "      val = 0;\n"
00092 "    }\n"
00093 "    barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
00094 "    //check for carry from previous loop run: \n"
00095 "    if (get_local_id(0) == 0 && k > 0)\n"
00096 "    { \n"
00097 "      if (tmp.x == shared_rows[last_index]) \n"
00098 "        val += inter_results[last_index]; \n"
00099 "      else \n"
00100 "        result[shared_rows[last_index]] += inter_results[last_index]; \n"
00101 "    } \n"
00102 "    barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
00103 "    segmented_parallel_reduction(tmp.x, val, shared_rows, inter_results); //all threads have to enter this function\n"
00104 "    barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
00105 "    if (get_local_id(0) != last_index &&\n"
00106 "        shared_rows[get_local_id(0)] != shared_rows[get_local_id(0) + 1] &&\n"
00107 "        inter_results[get_local_id(0)] != 0) \n"
00108 "    { \n"
00109 "      result[tmp.x] += inter_results[get_local_id(0)]; \n"
00110 "    }\n"
00111 "   \n"
00112 "    barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
00113 "  } //for k\n"
00114 "   \n"
00115 "  if (get_local_id(0) == last_index && inter_results[last_index] != 0) \n"
00116 "    result[tmp.x] += inter_results[last_index]; \n"
00117 "}\n"
00118 ; //coordinate_matrix_align1_vec_mul
00119 
00120   }  //namespace kernels
00121  }  //namespace linalg
00122 }  //namespace viennacl
00123 #endif
/data/development/ViennaCL/dev/viennacl/linalg/kernels/coordinate_matrix_source.h