ViennaCL - The Vienna Computing Library  1.5.2
viennacl/generator/profiles.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_GENERATOR_PROFILES_HPP
00002 #define VIENNACL_GENERATOR_PROFILES_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006            Institute for Analysis and Scientific Computing,
00007            TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010            -----------------
00011  ViennaCL - The Vienna Computing Library
00012            -----------------
00013 
00014    Project Head:    Karl Rupp  rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00021 
00027 #include <map>
00028 
00029 #include "viennacl/ocl/device.hpp"
00030 
00031 #include "viennacl/generator/forwards.h"
00032 
00033 #include "viennacl/tools/shared_ptr.hpp"
00034 
00035 #include "viennacl/generator/profile_base.hpp"
00036 #include "viennacl/generator/saxpy.hpp"
00037 #include "viennacl/generator/scalar_reduction.hpp"
00038 #include "viennacl/generator/vector_reduction.hpp"
00039 #include "viennacl/generator/matrix_product.hpp"
00040 
00041 namespace viennacl{
00042 
00043   namespace generator{
00044 
00045     namespace profiles{
00046 
00047       typedef cl_uint vendor_id_type;
00048       typedef cl_device_type device_type;
00049 
00050       typedef std::string device_name_type;
00051       typedef viennacl::tools::shared_ptr<profile_base> profile_base_ptr;
00052 
00054       template<class KeyType, class ValueType>
00055       struct map_wrapper{
00056           typedef std::map<KeyType,ValueType> map_type;
00057           map_type map;
00058           ValueType & operator[](KeyType const & key){ return map[key]; }
00059       };
00060 
00062       struct expression_map : public map_wrapper<expression_key_type, profile_base_ptr>{ };
00063 
00065       struct device_name_map : public map_wrapper<device_name_type, expression_map>{ };
00066 
00068       struct device_architecture_map : public map_wrapper<viennacl::ocl::device_architecture_family, device_name_map>{ };
00069 
00071       struct device_type_map : public map_wrapper<device_type,device_architecture_map>{ };
00072 
00074       struct database_type : public map_wrapper<vendor_id_type, device_type_map>{ };
00075 
00077         inline void set_generation_default_to(database_type & map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, expression_key_type expression, std::string const & device_name){
00078             map[vendor_id][CL_DEVICE_TYPE_GPU][family][""][expression] = map[vendor_id][CL_DEVICE_TYPE_GPU][family][device_name][expression];
00079         }
00080 
00082         inline void set_all_generation_default_to(database_type & map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, std::string const & device_name){
00083             set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_SAXPY_TYPE,4),device_name);
00084             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_SAXPY_TYPE,4),device_name);
00085             set_generation_default_to(map,vendor_id,family,std::make_pair(SCALAR_REDUCE_TYPE,4),device_name);
00086             set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Nx_TYPE,4),device_name);
00087             set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Tx_TYPE,4),device_name);
00088             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NN_TYPE,4),device_name);
00089             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TN_TYPE,4),device_name);
00090             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NT_TYPE,4),device_name);
00091             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TT_TYPE,4),device_name);
00092 
00093             set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_SAXPY_TYPE,8),device_name);
00094             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_SAXPY_TYPE,8),device_name);
00095             set_generation_default_to(map,vendor_id,family,std::make_pair(SCALAR_REDUCE_TYPE,8),device_name);
00096             set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Nx_TYPE,8),device_name);
00097             set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Tx_TYPE,8),device_name);
00098             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NN_TYPE,8),device_name);
00099             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TN_TYPE,8),device_name);
00100             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NT_TYPE,8),device_name);
00101             set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TT_TYPE,8),device_name);
00102         }
00103 
00105       static database_type init_database(){
00106         database_type map;
00107 
00108         /*---------------------------*/
00109         /*     GPU Defaults          */
00110         /*---------------------------*/
00111         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,128,128,true));
00112         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00113         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(1, 128, 128, true));
00114         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32));
00115         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32));
00116         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
00117         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
00118         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
00119         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
00120 
00121 
00122         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(1,128,128,true));
00123         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00124         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(1, 128, 128, true));
00125         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32));
00126         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32));
00127         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
00128         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
00129         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
00130         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
00131 
00132         /*---------------------------*/
00133         /*     CPU Defaults          */
00134         /*---------------------------*/
00135         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(8,16,256,true));
00136         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00137         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,8,512,true));
00138         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,2,1,8));
00139         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,16,8,8));
00140         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00141         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00142         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00143         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00144 
00145 
00146         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(8,16,32,true));
00147         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00148         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(8,8,512,true));
00149         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,1,8));
00150         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,8,16,16));
00151         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00152         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00153         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00154         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00155 
00156 
00157         /*---------------------------*/
00158         /*     ACCELERATOR Defaults  */
00159         /*---------------------------*/
00160         //same as CPU for now
00161         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(8,16,256,true));
00162         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00163         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,8,512,true));
00164         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,2,1,8));
00165         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,16,8,8));
00166         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00167         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00168         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00169         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00170 
00171 
00172         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(8,16,32,true));
00173         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00174         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(8,8,512,true));
00175         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,1,8));
00176         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,8,16,16));
00177         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00178         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00179         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00180         map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
00181 
00182 
00183 
00184         /*---------------------------*/
00185         /*     AMD                   */
00186         /*---------------------------*/
00187 
00188         //Evergreen
00189 
00190             //Cypress
00191             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,4,64,true));
00192             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00193             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,128,128,true));
00194             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
00195             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,32,8,256));
00196             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,4,64,16,4,4,8,1,0));
00197             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(4,4,64,16,4,4,8,1,0));
00198             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,64,16,4,4,8,1,0));
00199             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,128,8,8,4,4,0,0));
00200 
00201             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true));
00202             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00203             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,256,64,true));
00204             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
00205             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,64,4,256));
00206             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,32,16,2,2,8,0,0));
00207             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(2,4,64,32,4,2,2,0,0));
00208             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(4,2,64,32,8,8,4,0,0));
00209             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,64,8,2,2,4,0,0));
00210 
00211             //Default
00212             set_all_generation_default_to(map,viennacl::ocl::amd_id,viennacl::ocl::Evergreen,"Cypress");
00213 
00214 
00215          //Southern Islands
00216             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,4,64,true));
00217             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00218             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,128,128,true));
00219             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
00220             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,32,8,256));
00221             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,128,32,4,4,4,1,0));
00222             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,64,16,4,2,8,1,0));
00223             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,16,4,4,8,1,0));
00224             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,16,4,4,8,1,0));
00225 
00226             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true));
00227             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00228             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,256,64,true));
00229             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
00230             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,64,4,256));
00231             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,4,128,64,4,2,2,1,0));
00232             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(2,2,128,32,4,2,2,0,0));
00233             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(2,8,128,32,2,2,2,1,0));
00234             map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(2,8,128,32,2,2,2,1,0));
00235 
00236 
00237             //Default
00238             set_all_generation_default_to(map,viennacl::ocl::amd_id,viennacl::ocl::SouthernIslands,"Tahiti");
00239 
00240 
00241         /*---------------------------*/
00242         /*     NVidia                */
00243         /*---------------------------*/
00244 
00245         //-----Fermi
00246 
00247           //Geforce GTX 470
00248           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_SAXPY_TYPE,4)]      =    profile_base_ptr(new vector_saxpy(1,1,256,true));
00249           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_SAXPY_TYPE,4)]      =    profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00250           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(SCALAR_REDUCE_TYPE,4)]     =    profile_base_ptr(new scalar_reduction(4,64,512,true));
00251           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)]  =    profile_base_ptr(new vector_reduction(1,1,256,1024));
00252           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)]  =    profile_base_ptr(new vector_reduction(1,64,4,64));
00253           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] =    profile_base_ptr(new matrix_product(1,2,64,64,8,4,2,1,0));
00254           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] =    profile_base_ptr(new matrix_product(1,8,32,16,4,4,8,0,0));
00255           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] =    profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
00256           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] =    profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
00257 
00258           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_SAXPY_TYPE,8)]      =    profile_base_ptr(new vector_saxpy(2,1,64,true));
00259           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_SAXPY_TYPE,8)]      =    profile_base_ptr(new matrix_saxpy(2,16,16,16,16,true));
00260           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(SCALAR_REDUCE_TYPE,8)]     =    profile_base_ptr(new scalar_reduction(2,64,512,true));
00261           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)]  =    profile_base_ptr(new vector_reduction(1,1,128,1024));
00262           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)]  =    profile_base_ptr(new vector_reduction(1,16,32,1024));
00263           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] =    profile_base_ptr(new matrix_product(1,8,64,32,2,2,8,1,0));
00264           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] =    profile_base_ptr(new matrix_product(1,64,128,4,2,2,8,0,1));
00265           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] =    profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
00266           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] =    profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
00267 
00268           //default
00269           set_all_generation_default_to(map,viennacl::ocl::nvidia_id,viennacl::ocl::Fermi,"GeForce GTX 470");
00270 
00271           //Geforce GTX 680
00272           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_SAXPY_TYPE,4)]      =    profile_base_ptr(new vector_saxpy(1,1,256,true));
00273           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_SAXPY_TYPE,4)]      =    profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
00274           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(SCALAR_REDUCE_TYPE,4)]     =    profile_base_ptr(new scalar_reduction(4,64,512,true));
00275           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)]  =    profile_base_ptr(new vector_reduction(1,1,256,1024));
00276           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)]  =    profile_base_ptr(new vector_reduction(1,64,4,64));
00277           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] =    profile_base_ptr(new matrix_product(1,2,64,64,8,4,2,1,0));
00278           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] =    profile_base_ptr(new matrix_product(1,8,32,16,4,4,8,0,0));
00279           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] =    profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
00280           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] =    profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
00281 
00282           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_SAXPY_TYPE,8)]      =    profile_base_ptr(new vector_saxpy(2,1,64,true));
00283           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_SAXPY_TYPE,8)]      =    profile_base_ptr(new matrix_saxpy(2,16,16,16,16,true));
00284           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(SCALAR_REDUCE_TYPE,8)]     =    profile_base_ptr(new scalar_reduction(2,64,512,true));
00285           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)]  =    profile_base_ptr(new vector_reduction(1,1,128,1024));
00286           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)]  =    profile_base_ptr(new vector_reduction(1,16,32,1024));
00287           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] =    profile_base_ptr(new matrix_product(1,8,64,32,2,2,8,1,0));
00288           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] =    profile_base_ptr(new matrix_product(1,64,128,4,2,2,8,0,1));
00289           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] =    profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
00290           map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] =    profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
00291 
00292           //default
00293           set_all_generation_default_to(map,viennacl::ocl::nvidia_id,viennacl::ocl::Kepler,"GeForce GTX 680");
00294 
00295 
00296         return map;
00297       }
00298       static database_type database = init_database();
00299 
00301       static profile_base * handle_failure(viennacl::ocl::device const & device, expression_descriptor const & descriptor, tools::shared_ptr<profile_base> const & profile){
00302         //Returns default if the profile is invalid
00303         if(profile->is_invalid(device, descriptor.scalartype_size))
00304           return at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, device.type()).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, descriptor.make_key()).get();
00305         return profile.get();
00306       }
00307 
00309       static profile_base * get(viennacl::ocl::device const & device, expression_descriptor const & descriptor){
00310         device_type dev_type = device.type();
00311         vendor_id_type vendor_id = device.vendor_id();
00312         viennacl::ocl::device_architecture_family device_architecture = device.architecture_family();
00313         std::string const & device_name = device.name();
00314         expression_key_type expression_key = descriptor.make_key();
00315 
00316         //std::cout << "Looking up vendor ID..." << std::endl;
00317         /*-Vendor ID-*/
00318         database_type::map_type::iterator vendor_it = database.map.find(vendor_id);
00319         //Vendor not recognized => global default:
00320         if(vendor_it==database.map.end())
00321           return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key));
00322 
00323         /*-Device Type-*/
00324         //std::cout << "Looking up device type..." << std::endl;
00325         device_type_map::map_type::iterator device_type_it = vendor_it->second.map.find(dev_type);
00326         //Device type not recognized for this vendor => global default
00327         if(device_type_it==vendor_it->second.map.end())
00328           return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key));
00329 
00330         /*-Device Architecture-*/
00331         //std::cout << "Looking up device architecture..." << std::endl;
00332         device_architecture_map::map_type::iterator architecture_it = device_type_it->second.map.find(device_architecture);
00333         if(architecture_it==device_type_it->second.map.end())
00334           return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key));
00335 
00336         /*-Device Name-*/
00337         //std::cout << "Looking up device name..." << std::endl;
00338         device_name_map::map_type::iterator device_name_it = architecture_it->second.map.find(device_name);
00339         //Name not found => Vendor default
00340         if(device_name_it==architecture_it->second.map.end())
00341           return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key));
00342 
00343         //std::cout << "Looking up expression name.." << std::endl;
00344         /*-Expression-*/
00345         expression_map::map_type::iterator expression_it = device_name_it->second.map.find(expression_key);
00346         //Expression not found => Vendor default
00347         if(expression_it==device_name_it->second.map.end())
00348           return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key));
00349 
00350         //std::cout << "Device found in the database! Getting profile..." << std::endl;
00351         //Everything okay. Return specific profile//
00352         return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key));
00353       }
00354 
00355     }
00356 
00357   }
00358 
00359 }
00360 
00361 
00362 #endif
00363