ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_GENERATOR_GENERATE_TEMPLATE_BASE_BASE 00002 #define VIENNACL_GENERATOR_GENERATE_TEMPLATE_BASE_BASE 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00021 00027 #include <list> 00028 #include <set> 00029 00030 #include "viennacl/ocl/backend.hpp" 00031 #include "viennacl/ocl/kernel.hpp" 00032 #include "viennacl/ocl/device.hpp" 00033 #include "viennacl/ocl/device_utils.hpp" 00034 #include "viennacl/ocl/infos.hpp" 00035 00036 #include "viennacl/scheduler/forwards.h" 00037 00038 #include "viennacl/generator/helpers.hpp" 00039 #include "viennacl/generator/map_functor.hpp" 00040 00041 namespace viennacl{ 00042 00043 namespace generator{ 00044 00045 00047 class profile_base{ 00048 public: 00049 typedef std::list< std::pair<scheduler::statement, scheduler::statement_node> > statements_type; 00050 00051 protected: 00052 friend std::ostream & operator<<(std::ostream &, profile_base const &); 00053 00054 virtual bool invalid_impl(viennacl::ocl::device const & /*dev*/, vcl_size_t /*scalartype_size*/) const { return false; } 00055 virtual bool is_slow_impl(viennacl::ocl::device const &) const { return false; } 00056 00057 virtual vcl_size_t lmem_used(vcl_size_t /*scalartype_size*/) const { return 0; } 00058 00059 void configure_local_sizes(viennacl::ocl::kernel & k, vcl_size_t /*kernel_id*/) const { 00060 k.local_work_size(0,local_size_1_); 00061 k.local_work_size(1,local_size_2_); 00062 } 00063 00064 virtual void print(std::ostream & s) const{ 00065 s << csv_representation(); 00066 } 00067 00075 virtual void core(vcl_size_t kernel_id, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const = 0; 00076 00077 public: 00079 profile_base(unsigned int vectorization, vcl_size_t local_size_1, vcl_size_t local_size_2, vcl_size_t num_kernels) : vector_size_(vectorization), local_size_1_(local_size_1), local_size_2_(local_size_2), num_kernels_(num_kernels){ } 00080 00082 virtual ~profile_base(){ } 00083 00085 virtual void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const = 0; 00086 00087 virtual void kernel_arguments(statements_type const & statements, std::string & arguments_string) const = 0; 00088 00090 unsigned int vector_size() const { return vector_size_; } 00091 00095 virtual std::string csv_representation() const = 0; 00096 00099 bool is_slow(viennacl::ocl::device const & dev) const{ 00100 bool res = false; 00101 if(dev.type()==CL_DEVICE_TYPE_GPU){ 00102 vcl_size_t warp_size = 32; 00103 if(dev.vendor_id()==4098) 00104 warp_size = 64; 00105 res = static_cast<bool>(((local_size_1_*local_size_2_)%warp_size)>0); 00106 } 00107 return res || is_slow_impl(dev); 00108 } 00109 00114 bool is_invalid(viennacl::ocl::device const & dev, vcl_size_t scalartype_size) const{ 00115 //Query device informations 00116 vcl_size_t lmem_available = static_cast<vcl_size_t>(dev.local_mem_size()); 00117 vcl_size_t max_workgroup_size = dev.max_work_group_size(); 00118 00119 std::vector<vcl_size_t> max_work_item_sizes = dev.max_work_item_sizes(); 00120 bool invalid_work_group_sizes = local_size_1_*local_size_2_ > max_workgroup_size 00121 || local_size_1_ > max_work_item_sizes[0] 00122 || local_size_2_ > max_work_item_sizes[1]; // uses too much resources 00123 00124 return invalid_work_group_sizes 00125 || lmem_used(scalartype_size)>lmem_available 00126 || invalid_impl(dev, scalartype_size); 00127 } 00128 00130 vcl_size_t num_kernels() const{ return num_kernels_; } 00131 00138 virtual void operator()(utils::kernel_generation_stream & stream, vcl_size_t device_offset, statements_type const & statements) const { 00139 std::vector<detail::mapping_type> mapping(statements.size()); 00140 00142 std::string prototype; 00143 std::set<std::string> already_generated; 00144 kernel_arguments(statements, prototype); 00145 00146 { 00147 std::map<void *, vcl_size_t> memory; 00148 unsigned int current_arg = 0; 00149 vcl_size_t i = 0; 00150 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it) 00151 detail::traverse(it->first, it->second, detail::map_functor(memory,current_arg,mapping[i++])); 00152 } 00153 00154 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ 00155 detail::traverse(it->first, it->second, detail::prototype_generation_traversal(already_generated, prototype, vector_size(), mapping[std::distance(statements.begin(), it)])); 00156 } 00157 00158 prototype.erase(prototype.size()-1); //Last comma pruned 00159 00160 //Generate 00161 for(vcl_size_t n = 0 ; n < num_kernels() ; ++n){ 00162 //stream << "__attribute__((vec_type_hint()))" << std::endl; 00163 stream << " __attribute__((reqd_work_group_size(" << local_size_1_ << "," << local_size_2_ << "," << 1 << ")))" << std::endl; 00164 stream << "__kernel " << "void " << "kernel_" << device_offset << "_" << n << "(" << std::endl; 00165 stream << prototype << std::endl; 00166 stream << ")" << std::endl; 00167 00168 //core: 00169 stream << "{" << std::endl; 00170 stream.inc_tab(); 00171 core(n, stream, statements, mapping); 00172 stream.dec_tab(); 00173 stream << "}" << std::endl; 00174 } 00175 } 00176 00177 protected: 00178 unsigned int vector_size_; 00179 vcl_size_t local_size_1_; 00180 vcl_size_t local_size_2_; 00181 vcl_size_t num_kernels_; 00182 }; 00183 00184 00185 inline std::ostream & operator<<(std::ostream & os, profile_base const & profile){ 00186 profile.print(os); 00187 return os; 00188 } 00189 00190 } 00191 00192 } 00193 00194 #endif