ViennaCL - The Vienna Computing Library  1.6.1
Free open-source GPU-accelerated linear algebra and solver library.
vector_float_double.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2014, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 
23 //
24 // *** System
25 //
26 #include <iostream>
27 #include <iomanip>
28 #include <cmath>
29 
30 //
31 // *** ViennaCL
32 //
33 //#define VIENNACL_DEBUG_ALL
34 
35 #include "viennacl/vector.hpp"
42 
43 #include "Random.hpp"
44 
45 
46 template<typename NumericT>
48 {
49 public:
50  vector_proxy(NumericT * p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
51  : values_(p_values), start_(start_idx), inc_(increment), size_(num_elements) {}
52 
53  NumericT const & operator[](std::size_t index) const { return values_[start_ + index * inc_]; }
54  NumericT & operator[](std::size_t index) { return values_[start_ + index * inc_]; }
55 
56  std::size_t size() const { return size_; }
57 
58 private:
59  NumericT * values_;
60  std::size_t start_;
61  std::size_t inc_;
62  std::size_t size_;
63 };
64 
65 template<typename NumericT>
67 {
68  std::vector<NumericT> std_vec(host_vec.size());
69 
70  for (std::size_t i=0; i<host_vec.size(); ++i)
71  std_vec[i] = host_vec[i];
72 
73  viennacl::copy(std_vec.begin(), std_vec.end(), vcl_vec.begin());
74 }
75 
76 template<typename NumericT>
78 {
79  std::vector<NumericT> std_vec(vcl_vec.size());
80 
81  viennacl::copy(vcl_vec.begin(), vcl_vec.end(), std_vec.begin());
82 
83  for (std::size_t i=0; i<host_vec.size(); ++i)
84  host_vec[i] = std_vec[i];
85 }
86 
87 
88 //
89 // -------------------------------------------------------------
90 //
91 template<typename ScalarType>
93 {
95  if (std::fabs(s1 - s2) > 0 )
96  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
97  return 0;
98 }
99 //
100 // -------------------------------------------------------------
101 //
102 template<typename ScalarType>
104 {
106  if (std::fabs(s1 - s2) > 0 )
107  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
108  return 0;
109 }
110 //
111 // -------------------------------------------------------------
112 //
113 template<typename ScalarType>
115 {
117  if (std::fabs(s1 - s2) > 0 )
118  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
119  return 0;
120 }
121 //
122 // -------------------------------------------------------------
123 //
124 template<typename ScalarType, typename ViennaCLVectorType>
125 ScalarType diff(vector_proxy<ScalarType> const & v1, ViennaCLVectorType const & vcl_vec)
126 {
127  std::vector<ScalarType> v2_cpu(vcl_vec.size());
129  viennacl::copy(vcl_vec, v2_cpu);
130 
131  for (unsigned int i=0;i<v1.size(); ++i)
132  {
133  if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
134  v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
135  else
136  v2_cpu[i] = 0.0;
137  }
138 
139  ScalarType ret = 0;
140  for (std::size_t i=0; i<v2_cpu.size(); ++i)
141  ret = std::max(ret, std::fabs(v2_cpu[i]));
142  return ret;
143 }
144 
145 
146 template<typename T1, typename T2>
147 int check(T1 const & t1, T2 const & t2, double epsilon)
148 {
149  int retval = EXIT_SUCCESS;
150 
151  double temp = std::fabs(diff(t1, t2));
152  if (temp > epsilon)
153  {
154  std::cout << "# Error! Relative difference: " << temp << std::endl;
155  retval = EXIT_FAILURE;
156  }
157  return retval;
158 }
159 
160 
161 //
162 // -------------------------------------------------------------
163 //
164 template< typename NumericT, typename Epsilon, typename HostVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 >
165 int test(Epsilon const& epsilon,
166  HostVectorType & host_v1, HostVectorType & host_v2,
167  ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
168 {
169  int retval = EXIT_SUCCESS;
170 
171  NumericT cpu_result = 42.0;
172  viennacl::scalar<NumericT> gpu_result = 43.0;
173 
174  //
175  // Initializer:
176  //
177  std::cout << "Checking for zero_vector initializer..." << std::endl;
178  for (std::size_t i=0; i<host_v1.size(); ++i)
179  host_v1[i] = NumericT(0);
180  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
181  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
182  return EXIT_FAILURE;
183 
184  std::cout << "Checking for scalar_vector initializer..." << std::endl;
185  for (std::size_t i=0; i<host_v1.size(); ++i)
186  host_v1[i] = NumericT(cpu_result);
187  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
188  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
189  return EXIT_FAILURE;
190 
191  for (std::size_t i=0; i<host_v1.size(); ++i)
192  host_v1[i] = NumericT(gpu_result);
193  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
194  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
195  return EXIT_FAILURE;
196 
197  std::cout << "Checking for unit_vector initializer..." << std::endl;
198  for (std::size_t i=0; i<host_v1.size(); ++i)
199  host_v1[i] = NumericT(0);
200  host_v1[5] = NumericT(1);
201  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
202  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
203  return EXIT_FAILURE;
204 
205 
206  for (std::size_t i=0; i<host_v1.size(); ++i)
207  {
208  host_v1[i] = NumericT(1.0) + random<NumericT>();
209  host_v2[i] = NumericT(1.0) + random<NumericT>();
210  }
211 
212  proxy_copy(host_v1, vcl_v1); //resync
213  proxy_copy(host_v2, vcl_v2);
214 
215  std::cout << "Checking for successful copy..." << std::endl;
216  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
217  return EXIT_FAILURE;
218  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
219  return EXIT_FAILURE;
220 
221  //
222  // Part 1: Norms and inner product
223  //
224 
225  // --------------------------------------------------------------------------
226  std::cout << "Testing inner_prod..." << std::endl;
227  cpu_result = 0;
228  for (std::size_t i=0; i<host_v1.size(); ++i)
229  cpu_result += host_v1[i] * host_v2[i];
230  NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
231  gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
232 
233  if (check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
234  return EXIT_FAILURE;
235  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
236  return EXIT_FAILURE;
237 
238  cpu_result = 0;
239  for (std::size_t i=0; i<host_v1.size(); ++i)
240  cpu_result += (host_v1[i] + host_v2[i]) * (host_v2[i] - host_v1[i]);
241  NumericT cpu_result3 = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1);
242  gpu_result = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1);
243 
244  if (check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS)
245  return EXIT_FAILURE;
246  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
247  return EXIT_FAILURE;
248 
249  // --------------------------------------------------------------------------
250  std::cout << "Testing norm_1..." << std::endl;
251  cpu_result = 0;
252  for (std::size_t i=0; i<host_v1.size(); ++i)
253  cpu_result += std::fabs(host_v1[i]);
254  gpu_result = viennacl::linalg::norm_1(vcl_v1);
255 
256  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
257  return EXIT_FAILURE;
258 
259  gpu_result = 2 * cpu_result; //reset
260  cpu_result = 0;
261  for (std::size_t i=0; i<host_v1.size(); ++i)
262  cpu_result += std::fabs(host_v1[i]);
263  gpu_result = cpu_result;
264  cpu_result = 0;
265  cpu_result = viennacl::linalg::norm_1(vcl_v1);
266 
267  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
268  return EXIT_FAILURE;
269 
270  cpu_result = 0;
271  for (std::size_t i=0; i<host_v1.size(); ++i)
272  cpu_result += std::fabs(host_v1[i] + host_v2[i]);
273  gpu_result = cpu_result;
274  cpu_result = 0;
275  cpu_result = viennacl::linalg::norm_1(vcl_v1 + vcl_v2);
276 
277  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
278  return EXIT_FAILURE;
279 
280  // --------------------------------------------------------------------------
281  std::cout << "Testing norm_2..." << std::endl;
282  cpu_result = 0;
283  for (std::size_t i=0; i<host_v1.size(); ++i)
284  cpu_result += host_v1[i] * host_v1[i];
285  cpu_result = std::sqrt(cpu_result);
286  gpu_result = viennacl::linalg::norm_2(vcl_v1);
287 
288  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
289  return EXIT_FAILURE;
290 
291  gpu_result = 2 * cpu_result; //reset
292  cpu_result = 0;
293  for (std::size_t i=0; i<host_v1.size(); ++i)
294  cpu_result += host_v1[i] * host_v1[i];
295  gpu_result = std::sqrt(cpu_result);
296  cpu_result = viennacl::linalg::norm_2(vcl_v1);
297 
298  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
299  return EXIT_FAILURE;
300 
301  cpu_result = 0;
302  for (std::size_t i=0; i<host_v1.size(); ++i)
303  cpu_result += (host_v1[i] + host_v2[i]) * (host_v1[i] + host_v2[i]);
304  gpu_result = std::sqrt(cpu_result);
305  cpu_result = viennacl::linalg::norm_2(vcl_v1 + vcl_v2);
306 
307  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
308  return EXIT_FAILURE;
309 
310  // --------------------------------------------------------------------------
311  std::cout << "Testing norm_inf..." << std::endl;
312  cpu_result = std::fabs(host_v1[0]);
313  for (std::size_t i=0; i<host_v1.size(); ++i)
314  cpu_result = std::max(std::fabs(host_v1[i]), cpu_result);
315  gpu_result = viennacl::linalg::norm_inf(vcl_v1);
316 
317  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
318  return EXIT_FAILURE;
319 
320  gpu_result = 2 * cpu_result; //reset
321  cpu_result = std::fabs(host_v1[0]);
322  for (std::size_t i=0; i<host_v1.size(); ++i)
323  cpu_result = std::max(std::fabs(host_v1[i]), cpu_result);
324  gpu_result = cpu_result;
325  cpu_result = 0;
326  cpu_result = viennacl::linalg::norm_inf(vcl_v1);
327 
328  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
329  return EXIT_FAILURE;
330 
331  cpu_result = std::fabs(host_v1[0]);
332  for (std::size_t i=0; i<host_v1.size(); ++i)
333  cpu_result = std::max(std::fabs(host_v1[i] + host_v2[i]), cpu_result);
334  gpu_result = cpu_result;
335  cpu_result = 0;
336  cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2);
337 
338  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
339  return EXIT_FAILURE;
340 
341  // --------------------------------------------------------------------------
342  std::cout << "Testing index_norm_inf..." << std::endl;
343  std::size_t cpu_index = 0;
344  cpu_result = std::fabs(host_v1[0]);
345  for (std::size_t i=0; i<host_v1.size(); ++i)
346  {
347  if (std::fabs(host_v1[i]) > cpu_result)
348  {
349  cpu_result = std::fabs(host_v1[i]);
350  cpu_index = i;
351  }
352  }
353  std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1);
354 
355  if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS)
356  return EXIT_FAILURE;
357  // --------------------------------------------------------------------------
358  cpu_result = host_v1[cpu_index];
359  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)];
360 
361  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
362  return EXIT_FAILURE;
363 
364  cpu_result = std::fabs(host_v1[0] + host_v2[0]);
365  for (std::size_t i=0; i<host_v1.size(); ++i)
366  {
367  if (std::fabs(host_v1[i] + host_v2[i]) > cpu_result)
368  {
369  cpu_result = std::fabs(host_v1[i] + host_v2[i]);
370  cpu_index = i;
371  }
372  }
373  cpu_result = host_v1[cpu_index];
374  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)];
375 
376  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
377  return EXIT_FAILURE;
378 
379 
380  // --------------------------------------------------------------------------
381  std::cout << "Testing max..." << std::endl;
382  cpu_result = host_v1[0];
383  for (std::size_t i=0; i<host_v1.size(); ++i)
384  cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
385  gpu_result = viennacl::linalg::max(vcl_v1);
386 
387  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
388  return EXIT_FAILURE;
389 
390  cpu_result = host_v1[0];
391  for (std::size_t i=0; i<host_v1.size(); ++i)
392  cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
393  gpu_result = cpu_result;
394  cpu_result *= 2; //reset
395  cpu_result = viennacl::linalg::max(vcl_v1);
396 
397  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
398  return EXIT_FAILURE;
399 
400  cpu_result = host_v1[0] + host_v2[0];
401  for (std::size_t i=0; i<host_v1.size(); ++i)
402  cpu_result = std::max<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
403  gpu_result = cpu_result;
404  cpu_result *= 2; //reset
405  cpu_result = viennacl::linalg::max(vcl_v1 + vcl_v2);
406 
407  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
408  return EXIT_FAILURE;
409 
410 
411  // --------------------------------------------------------------------------
412  std::cout << "Testing min..." << std::endl;
413  cpu_result = host_v1[0];
414  for (std::size_t i=0; i<host_v1.size(); ++i)
415  cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
416  gpu_result = viennacl::linalg::min(vcl_v1);
417 
418  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
419  return EXIT_FAILURE;
420 
421  cpu_result = host_v1[0];
422  for (std::size_t i=0; i<host_v1.size(); ++i)
423  cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
424  gpu_result = cpu_result;
425  cpu_result *= 2; //reset
426  cpu_result = viennacl::linalg::min(vcl_v1);
427 
428  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
429  return EXIT_FAILURE;
430 
431  cpu_result = host_v1[0] + host_v2[0];
432  for (std::size_t i=0; i<host_v1.size(); ++i)
433  cpu_result = std::min<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
434  gpu_result = cpu_result;
435  cpu_result *= 2; //reset
436  cpu_result = viennacl::linalg::min(vcl_v1 + vcl_v2);
437 
438  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
439  return EXIT_FAILURE;
440 
441 
442 
443  //
444  // Plane rotation and assignments
445  //
446 
447  // --------------------------------------------------------------------------
448 
449  for (std::size_t i=0; i<host_v1.size(); ++i)
450  {
451  NumericT temp = NumericT(1.1) * host_v1[i] + NumericT(2.3) * host_v2[i];
452  host_v2[i] = - NumericT(2.3) * host_v1[i] + NumericT(1.1) * host_v2[i];
453  host_v1[i] = temp;
454  }
455  viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1.1), NumericT(2.3));
456 
457  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
458  return EXIT_FAILURE;
459  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
460  return EXIT_FAILURE;
461 
462  // --------------------------------------------------------------------------
463 
464  std::cout << "Testing assignments..." << std::endl;
465  NumericT val = static_cast<NumericT>(1e-1);
466  for (size_t i=0; i < host_v1.size(); ++i)
467  host_v1[i] = val;
468 
469  for (size_t i=0; i < vcl_v1.size(); ++i)
470  vcl_v1(i) = val;
471 
472  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
473  return EXIT_FAILURE;
474 
475  std::cout << "Testing assignments via iterators..." << std::endl;
476 
477  host_v1[2] = static_cast<NumericT>(1.9);
478  vcl_v1[2] = static_cast<NumericT>(1.9);
479 
480  host_v1[2] = static_cast<NumericT>(1.5);
481  typename ViennaCLVectorType1::iterator vcl_v1_it = vcl_v1.begin();
482  ++vcl_v1_it;
483  ++vcl_v1_it;
484  *vcl_v1_it = static_cast<NumericT>(1.5);
485 
486  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
487  return EXIT_FAILURE;
488 
489  //
490  // multiplication and division of vectors by scalars
491  //
492  for (std::size_t i=0; i < host_v1.size(); ++i)
493  {
494  host_v1[i] = NumericT(1.0) + random<NumericT>();
495  host_v2[i] = NumericT(3.1415) * host_v1[i];
496  }
497  proxy_copy(host_v1, vcl_v1); //resync
498  proxy_copy(host_v2, vcl_v2);
499 
500  std::cout << "Testing scaling with CPU scalar..." << std::endl;
501  NumericT alpha = static_cast<NumericT>(1.7182);
502  viennacl::scalar<NumericT> gpu_alpha = alpha;
503 
504  for (std::size_t i=0; i < host_v1.size(); ++i)
505  host_v1[i] *= long(alpha);
506  vcl_v1 *= long(alpha);
507 
508  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
509  return EXIT_FAILURE;
510 
511  for (std::size_t i=0; i < host_v1.size(); ++i)
512  host_v1[i] *= float(alpha);
513  vcl_v1 *= float(alpha);
514 
515  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
516  return EXIT_FAILURE;
517 
518  for (std::size_t i=0; i < host_v1.size(); ++i)
519  host_v1[i] *= double(alpha);
520  vcl_v1 *= double(alpha);
521 
522  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
523  return EXIT_FAILURE;
524 
525 
526  std::cout << "Testing scaling with GPU scalar..." << std::endl;
527  for (std::size_t i=0; i < host_v1.size(); ++i)
528  host_v1[i] *= alpha;
529  vcl_v1 *= gpu_alpha;
530 
531  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
532  return EXIT_FAILURE;
533 
534  std::cout << "Testing scaling with scalar expression..." << std::endl;
535  cpu_result = 0;
536  for (std::size_t i=0; i < host_v1.size(); ++i)
537  cpu_result += host_v1[i] * host_v2[i];
538  for (std::size_t i=0; i < host_v1.size(); ++i)
539  host_v1[i] *= cpu_result;
540  vcl_v1 *= viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
541 
542  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
543  return EXIT_FAILURE;
544 
545  NumericT beta = static_cast<NumericT>(1.4153);
546  viennacl::scalar<NumericT> gpu_beta = beta;
547 
548  std::cout << "Testing shrinking with CPU scalar..." << std::endl;
549  for (std::size_t i=0; i < host_v1.size(); ++i)
550  host_v1[i] /= long(beta);
551  vcl_v1 /= long(beta);
552 
553  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
554  return EXIT_FAILURE;
555 
556  for (std::size_t i=0; i < host_v1.size(); ++i)
557  host_v1[i] /= float(beta);
558  vcl_v1 /= float(beta);
559 
560  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
561  return EXIT_FAILURE;
562 
563  for (std::size_t i=0; i < host_v1.size(); ++i)
564  host_v1[i] /= double(beta);
565  vcl_v1 /= double(beta);
566 
567  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
568  return EXIT_FAILURE;
569 
570 
571  std::cout << "Testing shrinking with GPU scalar..." << std::endl;
572  for (std::size_t i=0; i < host_v1.size(); ++i)
573  host_v1[i] /= beta;
574  vcl_v1 /= gpu_beta;
575 
576  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
577  return EXIT_FAILURE;
578 
579 
580 
581  //
582  // add and inplace_add of vectors
583  //
584  for (size_t i=0; i < host_v1.size(); ++i)
585  {
586  host_v1[i] = NumericT(1.0) + random<NumericT>();
587  host_v2[i] = NumericT(3.1415) * host_v1[i];
588  }
589  proxy_copy(host_v1, vcl_v1); //resync
590  proxy_copy(host_v2, vcl_v2);
591 
592  std::cout << "Testing add on vector..." << std::endl;
593 
594  std::cout << "Checking for successful copy..." << std::endl;
595  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
596  return EXIT_FAILURE;
597  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
598  return EXIT_FAILURE;
599 
600  for (size_t i=0; i < host_v1.size(); ++i)
601  host_v1[i] = host_v1[i] + host_v2[i];
602  vcl_v1 = vcl_v1 + vcl_v2;
603 
604  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
605  return EXIT_FAILURE;
606 
607  std::cout << "Testing add on vector with flipsign..." << std::endl;
608  for (size_t i=0; i < host_v1.size(); ++i)
609  host_v1[i] = - host_v1[i] + host_v2[i];
610  vcl_v1 = - vcl_v1 + vcl_v2;
611 
612  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
613  return EXIT_FAILURE;
614 
615  std::cout << "Testing inplace-add on vector..." << std::endl;
616  for (size_t i=0; i < host_v1.size(); ++i)
617  host_v1[i] += host_v2[i];
618  vcl_v1 += vcl_v2;
619 
620  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
621  return EXIT_FAILURE;
622 
623  std::cout << "Testing assignment to vector with vector multiplied by scalar expression..." << std::endl;
624  cpu_result = 0;
625  for (std::size_t i=0; i < host_v1.size(); ++i)
626  cpu_result += host_v1[i] * host_v2[i];
627  for (std::size_t i=0; i < host_v1.size(); ++i)
628  host_v1[i] = cpu_result * host_v2[i];
629  //host_v1 = inner_prod(host_v1, host_v2) * host_v2;
630  vcl_v1 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2) * vcl_v2;
631 
632  //
633  // subtract and inplace_subtract of vectors
634  //
635  std::cout << "Testing sub on vector..." << std::endl;
636  for (std::size_t i=0; i < host_v1.size(); ++i)
637  host_v2[i] = NumericT(3.1415) * host_v1[i];
638  proxy_copy(host_v1, vcl_v1);
639  proxy_copy(host_v2, vcl_v2);
640 
641  for (std::size_t i=0; i < host_v1.size(); ++i)
642  host_v1[i] = host_v1[i] - host_v2[i];
643  vcl_v1 = vcl_v1 - vcl_v2;
644 
645  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
646  return EXIT_FAILURE;
647 
648  std::cout << "Testing inplace-sub on vector..." << std::endl;
649  for (std::size_t i=0; i < host_v1.size(); ++i)
650  host_v1[i] -= host_v2[i];
651  vcl_v1 -= vcl_v2;
652 
653  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
654  return EXIT_FAILURE;
655 
656 
657 
658  //
659  // multiply-add
660  //
661  std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
662  for (size_t i=0; i < host_v1.size(); ++i)
663  {
664  host_v1[i] = NumericT(1.0) + random<NumericT>();
665  host_v2[i] = NumericT(3.1415) * host_v1[i];
666  }
667  proxy_copy(host_v1, vcl_v1);
668  proxy_copy(host_v2, vcl_v2);
669 
670  for (std::size_t i=0; i < host_v1.size(); ++i)
671  host_v1[i] = host_v1[i] + host_v2[i] * float(alpha);
672  vcl_v1 = vcl_v1 + vcl_v2 * float(alpha);
673 
674  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
675  return EXIT_FAILURE;
676 
677  for (std::size_t i=0; i < host_v1.size(); ++i)
678  host_v1[i] = host_v1[i] + host_v2[i] * double(alpha);
679  vcl_v1 = vcl_v1 + vcl_v2 * double(alpha);
680 
681  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
682  return EXIT_FAILURE;
683 
684 
685  std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
686  for (std::size_t i=0; i < host_v1.size(); ++i)
687  host_v2[i] = NumericT(3.1415) * host_v1[i];
688  proxy_copy(host_v1, vcl_v1);
689  proxy_copy(host_v2, vcl_v2);
690 
691  for (std::size_t i=0; i < host_v1.size(); ++i)
692  host_v1[i] = long(alpha) * host_v1[i] + host_v2[i];
693  vcl_v1 = long(alpha) * vcl_v1 + vcl_v2;
694 
695  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
696  return EXIT_FAILURE;
697 
698  for (std::size_t i=0; i < host_v1.size(); ++i)
699  host_v1[i] = float(alpha) * host_v1[i] + host_v2[i];
700  vcl_v1 = float(alpha) * vcl_v1 + vcl_v2;
701 
702  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
703  return EXIT_FAILURE;
704 
705  for (std::size_t i=0; i < host_v1.size(); ++i)
706  host_v1[i] = double(alpha) * host_v1[i] + host_v2[i];
707  vcl_v1 = double(alpha) * vcl_v1 + vcl_v2;
708 
709  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
710  return EXIT_FAILURE;
711 
712 
713  std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
714  for (std::size_t i=0; i < host_v1.size(); ++i)
715  host_v2[i] = NumericT(3.1415) * host_v1[i];
716  proxy_copy(host_v1, vcl_v1);
717  proxy_copy(host_v2, vcl_v2);
718 
719  for (std::size_t i=0; i < host_v1.size(); ++i)
720  host_v1[i] = long(alpha) * host_v1[i] + long(beta) * host_v2[i];
721  vcl_v1 = long(alpha) * vcl_v1 + long(beta) * vcl_v2;
722 
723  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
724  return EXIT_FAILURE;
725 
726  for (std::size_t i=0; i < host_v1.size(); ++i)
727  host_v1[i] = float(alpha) * host_v1[i] + float(beta) * host_v2[i];
728  vcl_v1 = float(alpha) * vcl_v1 + float(beta) * vcl_v2;
729 
730  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
731  return EXIT_FAILURE;
732 
733  for (std::size_t i=0; i < host_v1.size(); ++i)
734  host_v1[i] = double(alpha) * host_v1[i] + double(beta) * host_v2[i];
735  vcl_v1 = double(alpha) * vcl_v1 + double(beta) * vcl_v2;
736 
737  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
738  return EXIT_FAILURE;
739 
740 
741  std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
742  for (std::size_t i=0; i < host_v1.size(); ++i)
743  host_v2[i] = NumericT(3.1415) * host_v1[i];
744  proxy_copy(host_v1, vcl_v1);
745  proxy_copy(host_v2, vcl_v2);
746 
747  for (std::size_t i=0; i < host_v1.size(); ++i)
748  host_v1[i] += host_v2[i] * long(alpha);
749  vcl_v1 += vcl_v2 * long(alpha);
750 
751  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
752  return EXIT_FAILURE;
753 
754  for (std::size_t i=0; i < host_v1.size(); ++i)
755  host_v1[i] += host_v2[i] * float(alpha);
756  vcl_v1 += vcl_v2 * float(alpha);
757 
758  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
759  return EXIT_FAILURE;
760 
761  for (std::size_t i=0; i < host_v1.size(); ++i)
762  host_v1[i] += double(alpha) * host_v2[i];
763  vcl_v1 += double(alpha) * vcl_v2;
764 
765  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
766  return EXIT_FAILURE;
767 
768 
769  std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
770  for (std::size_t i=0; i < host_v1.size(); ++i)
771  host_v2[i] = NumericT(3.1415) * host_v1[i];
772  proxy_copy(host_v1, vcl_v1);
773  proxy_copy(host_v2, vcl_v2);
774 
775  for (std::size_t i=0; i < host_v1.size(); ++i)
776  host_v1[i] = host_v1[i] + alpha * host_v2[i];
777  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
778 
779  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
780  return EXIT_FAILURE;
781 
782  std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
783  for (std::size_t i=0; i < host_v1.size(); ++i)
784  host_v2[i] = NumericT(3.1415) * host_v1[i];
785  proxy_copy(host_v1, vcl_v1);
786  proxy_copy(host_v2, vcl_v2);
787 
788  for (std::size_t i=0; i < host_v1.size(); ++i)
789  host_v1[i] = host_v1[i] + alpha * host_v2[i];
790  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
791 
792  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
793  return EXIT_FAILURE;
794 
795  std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
796  for (std::size_t i=0; i < host_v1.size(); ++i)
797  host_v2[i] = NumericT(3.1415) * host_v1[i];
798  proxy_copy(host_v1, vcl_v1);
799  proxy_copy(host_v2, vcl_v2);
800 
801  for (std::size_t i=0; i < host_v1.size(); ++i)
802  host_v1[i] = alpha * host_v1[i] + beta * host_v2[i];
803  vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
804 
805  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
806  return EXIT_FAILURE;
807 
808 
809  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
810  for (std::size_t i=0; i < host_v1.size(); ++i)
811  host_v2[i] = NumericT(3.1415) * host_v1[i];
812  proxy_copy(host_v1, vcl_v1);
813  proxy_copy(host_v2, vcl_v2);
814 
815  for (std::size_t i=0; i < host_v1.size(); ++i)
816  host_v1[i] += alpha * host_v1[i] + beta * host_v2[i];
817  vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
818 
819  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
820  return EXIT_FAILURE;
821 
822  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
823  for (std::size_t i=0; i < host_v1.size(); ++i)
824  host_v2[i] = NumericT(3.1415) * host_v1[i];
825  proxy_copy(host_v1, vcl_v1);
826  proxy_copy(host_v2, vcl_v2);
827 
828  for (std::size_t i=0; i < host_v1.size(); ++i)
829  host_v1[i] += alpha * host_v1[i] - beta * host_v2[i];
830  vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
831 
832  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
833  return EXIT_FAILURE;
834 
835 
836 
837  std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
838  for (std::size_t i=0; i < host_v1.size(); ++i)
839  host_v2[i] = NumericT(3.1415) * host_v1[i];
840  proxy_copy(host_v1, vcl_v1);
841  proxy_copy(host_v2, vcl_v2);
842 
843  for (std::size_t i=0; i < host_v1.size(); ++i)
844  host_v1[i] += alpha * host_v2[i];
845  vcl_v1 += gpu_alpha * vcl_v2;
846 
847  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
848  return EXIT_FAILURE;
849 
850 
851  //
852  // division-add
853  //
854  std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl;
855  for (size_t i=0; i < host_v1.size(); ++i)
856  {
857  host_v1[i] = NumericT(1.0) + random<NumericT>();
858  host_v2[i] = NumericT(3.1415) * host_v1[i];
859  }
860  proxy_copy(host_v1, vcl_v1);
861  proxy_copy(host_v2, vcl_v2);
862 
863  for (std::size_t i=0; i < host_v1.size(); ++i)
864  host_v1[i] = host_v1[i] + host_v2[i] / long(alpha);
865  vcl_v1 = vcl_v1 + vcl_v2 / long(alpha);
866 
867  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
868  return EXIT_FAILURE;
869 
870  for (std::size_t i=0; i < host_v1.size(); ++i)
871  host_v1[i] = host_v1[i] + host_v2[i] / float(alpha);
872  vcl_v1 = vcl_v1 + vcl_v2 / float(alpha);
873 
874  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
875  return EXIT_FAILURE;
876 
877  for (std::size_t i=0; i < host_v1.size(); ++i)
878  host_v1[i] = host_v1[i] + host_v2[i] / double(alpha);
879  vcl_v1 = vcl_v1 + vcl_v2 / double(alpha);
880 
881  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
882  return EXIT_FAILURE;
883 
884 
885  std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl;
886  for (std::size_t i=0; i < host_v1.size(); ++i)
887  host_v2[i] = NumericT(3.1415) * host_v1[i];
888  proxy_copy(host_v1, vcl_v1);
889  proxy_copy(host_v2, vcl_v2);
890 
891  for (std::size_t i=0; i < host_v1.size(); ++i)
892  host_v1[i] = host_v1[i] / float(alpha) + host_v2[i];
893  vcl_v1 = vcl_v1 / float(alpha) + vcl_v2;
894 
895  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
896  return EXIT_FAILURE;
897 
898  for (std::size_t i=0; i < host_v1.size(); ++i)
899  host_v1[i] = host_v1[i] / double(alpha) + host_v2[i];
900  vcl_v1 = vcl_v1 / double(alpha) + vcl_v2;
901 
902  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
903  return EXIT_FAILURE;
904 
905 
906  std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl;
907  for (std::size_t i=0; i < host_v1.size(); ++i)
908  host_v2[i] = NumericT(3.1415) * host_v1[i];
909  proxy_copy(host_v1, vcl_v1);
910  proxy_copy(host_v2, vcl_v2);
911 
912  for (std::size_t i=0; i < host_v1.size(); ++i)
913  host_v1[i] = host_v1[i] / float(alpha) + host_v2[i] / float(beta);
914  vcl_v1 = vcl_v1 / float(alpha) + vcl_v2 / float(beta);
915 
916  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
917  return EXIT_FAILURE;
918 
919  for (std::size_t i=0; i < host_v1.size(); ++i)
920  host_v1[i] = host_v1[i] / double(alpha) + host_v2[i] / double(beta);
921  vcl_v1 = vcl_v1 / double(alpha) + vcl_v2 / double(beta);
922 
923  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
924  return EXIT_FAILURE;
925 
926  std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl;
927  for (std::size_t i=0; i < host_v1.size(); ++i)
928  host_v2[i] = NumericT(3.1415) * host_v1[i];
929  proxy_copy(host_v1, vcl_v1);
930  proxy_copy(host_v2, vcl_v2);
931 
932  for (std::size_t i=0; i < host_v1.size(); ++i)
933  host_v1[i] = host_v1[i] / alpha + host_v2[i] * beta;
934  vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
935 
936  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
937  return EXIT_FAILURE;
938 
939 
940  std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl;
941  for (std::size_t i=0; i < host_v1.size(); ++i)
942  host_v2[i] = NumericT(3.1415) * host_v1[i];
943  proxy_copy(host_v1, vcl_v1);
944  proxy_copy(host_v2, vcl_v2);
945 
946  for (std::size_t i=0; i < host_v1.size(); ++i)
947  host_v1[i] = host_v1[i] * alpha + host_v2[i] / beta;
948  vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
949 
950  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
951  return EXIT_FAILURE;
952 
953 
954 
955  std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl;
956  for (std::size_t i=0; i < host_v1.size(); ++i)
957  host_v2[i] = NumericT(3.1415) * host_v1[i];
958  proxy_copy(host_v1, vcl_v1);
959  proxy_copy(host_v2, vcl_v2);
960 
961  for (std::size_t i=0; i < host_v1.size(); ++i)
962  host_v1[i] += host_v2[i] / alpha;
963  vcl_v1 += vcl_v2 / alpha;
964 
965  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
966  return EXIT_FAILURE;
967 
968 
969  std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl;
970  for (std::size_t i=0; i < host_v1.size(); ++i)
971  host_v2[i] = NumericT(3.1415) * host_v1[i];
972  proxy_copy(host_v1, vcl_v1);
973  proxy_copy(host_v2, vcl_v2);
974 
975  for (std::size_t i=0; i < host_v1.size(); ++i)
976  host_v1[i] = host_v1[i] + host_v2[i] / alpha;
977  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
978 
979  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
980  return EXIT_FAILURE;
981 
982  std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl;
983  for (std::size_t i=0; i < host_v1.size(); ++i)
984  host_v2[i] = NumericT(3.1415) * host_v1[i];
985  proxy_copy(host_v1, vcl_v1);
986  proxy_copy(host_v2, vcl_v2);
987 
988  for (std::size_t i=0; i < host_v1.size(); ++i)
989  host_v1[i] = host_v1[i] + host_v2[i] / alpha;
990  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
991 
992  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
993  return EXIT_FAILURE;
994 
995  std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl;
996  for (std::size_t i=0; i < host_v1.size(); ++i)
997  host_v2[i] = NumericT(3.1415) * host_v1[i];
998  proxy_copy(host_v1, vcl_v1);
999  proxy_copy(host_v2, vcl_v2);
1000 
1001  for (std::size_t i=0; i < host_v1.size(); ++i)
1002  host_v1[i] = host_v1[i] / alpha + host_v2[i] / beta;
1003  vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1004 
1005  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1006  return EXIT_FAILURE;
1007 
1008 
1009  std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
1010  for (std::size_t i=0; i < host_v1.size(); ++i)
1011  host_v2[i] = NumericT(3.1415) * host_v1[i];
1012  proxy_copy(host_v1, vcl_v1);
1013  proxy_copy(host_v2, vcl_v2);
1014 
1015  for (std::size_t i=0; i < host_v1.size(); ++i)
1016  host_v1[i] += host_v1[i] / alpha + host_v2[i] / beta;
1017  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1018 
1019  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1020  return EXIT_FAILURE;
1021 
1022  std::cout << "Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
1023  for (std::size_t i=0; i < host_v1.size(); ++i)
1024  host_v2[i] = NumericT(3.1415) * host_v1[i];
1025  proxy_copy(host_v1, vcl_v1);
1026  proxy_copy(host_v2, vcl_v2);
1027 
1028  for (std::size_t i=0; i < host_v1.size(); ++i)
1029  host_v1[i] += host_v1[i] / alpha - host_v2[i] / beta;
1030  vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1031 
1032  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1033  return EXIT_FAILURE;
1034 
1035  std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
1036  for (std::size_t i=0; i < host_v1.size(); ++i)
1037  host_v2[i] = NumericT(3.1415) * host_v1[i];
1038  proxy_copy(host_v1, vcl_v1);
1039  proxy_copy(host_v2, vcl_v2);
1040 
1041  for (std::size_t i=0; i < host_v1.size(); ++i)
1042  host_v1[i] += host_v1[i] / alpha + host_v2[i] * beta;
1043  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1044 
1045  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1046  return EXIT_FAILURE;
1047 
1048  std::cout << "Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
1049  for (std::size_t i=0; i < host_v1.size(); ++i)
1050  host_v2[i] = NumericT(3.1415) * host_v1[i];
1051  proxy_copy(host_v1, vcl_v1);
1052  proxy_copy(host_v2, vcl_v2);
1053 
1054  for (std::size_t i=0; i < host_v1.size(); ++i)
1055  host_v1[i] += host_v1[i] * alpha - host_v2[i] / beta;
1056  vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1057 
1058  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1059  return EXIT_FAILURE;
1060 
1061 
1062 
1063  std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl;
1064  for (std::size_t i=0; i < host_v1.size(); ++i)
1065  host_v2[i] = NumericT(3.1415) * host_v1[i];
1066  proxy_copy(host_v1, vcl_v1);
1067  proxy_copy(host_v2, vcl_v2);
1068 
1069  for (std::size_t i=0; i < host_v1.size(); ++i)
1070  host_v1[i] += host_v2[i] * alpha;
1071  vcl_v1 += vcl_v2 * gpu_alpha;
1072 
1073  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1074  return EXIT_FAILURE;
1075 
1076 
1077 
1078  //
1079  // multiply-subtract
1080  //
1081  std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
1082  for (size_t i=0; i < host_v1.size(); ++i)
1083  {
1084  host_v1[i] = NumericT(1.0) + random<NumericT>();
1085  host_v2[i] = NumericT(3.1415) * host_v1[i];
1086  }
1087  proxy_copy(host_v1, vcl_v1);
1088  proxy_copy(host_v2, vcl_v2);
1089 
1090  for (std::size_t i=0; i < host_v1.size(); ++i)
1091  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1092  vcl_v1 = vcl_v1 - alpha * vcl_v2;
1093 
1094  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1095  return EXIT_FAILURE;
1096 
1097 
1098  std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
1099  for (std::size_t i=0; i < host_v1.size(); ++i)
1100  host_v2[i] = NumericT(3.1415) * host_v1[i];
1101  proxy_copy(host_v1, vcl_v1);
1102  proxy_copy(host_v2, vcl_v2);
1103 
1104  for (std::size_t i=0; i < host_v1.size(); ++i)
1105  host_v1[i] = alpha * host_v1[i] - host_v2[i];
1106  vcl_v1 = alpha * vcl_v1 - vcl_v2;
1107 
1108  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1109  return EXIT_FAILURE;
1110 
1111  std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
1112  for (std::size_t i=0; i < host_v1.size(); ++i)
1113  host_v2[i] = NumericT(3.1415) * host_v1[i];
1114  proxy_copy(host_v1, vcl_v1);
1115  proxy_copy(host_v2, vcl_v2);
1116 
1117  for (std::size_t i=0; i < host_v1.size(); ++i)
1118  host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1119  vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
1120 
1121  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1122  return EXIT_FAILURE;
1123 
1124 
1125  std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
1126  for (std::size_t i=0; i < host_v1.size(); ++i)
1127  host_v2[i] = NumericT(3.1415) * host_v1[i];
1128  proxy_copy(host_v1, vcl_v1);
1129  proxy_copy(host_v2, vcl_v2);
1130 
1131  for (std::size_t i=0; i < host_v1.size(); ++i)
1132  host_v1[i] -= alpha * host_v2[i];
1133  vcl_v1 -= alpha * vcl_v2;
1134 
1135  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1136  return EXIT_FAILURE;
1137 
1138 
1139  std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
1140  for (std::size_t i=0; i < host_v1.size(); ++i)
1141  host_v2[i] = NumericT(3.1415) * host_v1[i];
1142  proxy_copy(host_v1, vcl_v1);
1143  proxy_copy(host_v2, vcl_v2);
1144 
1145  for (std::size_t i=0; i < host_v1.size(); ++i)
1146  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1147  vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1148 
1149  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1150  return EXIT_FAILURE;
1151 
1152  std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
1153  for (std::size_t i=0; i < host_v1.size(); ++i)
1154  host_v2[i] = NumericT(3.1415) * host_v1[i];
1155  proxy_copy(host_v1, vcl_v1);
1156  proxy_copy(host_v2, vcl_v2);
1157 
1158  for (std::size_t i=0; i < host_v1.size(); ++i)
1159  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1160  vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1161 
1162  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1163  return EXIT_FAILURE;
1164 
1165  std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
1166  for (std::size_t i=0; i < host_v1.size(); ++i)
1167  host_v2[i] = NumericT(3.1415) * host_v1[i];
1168  proxy_copy(host_v1, vcl_v1);
1169  proxy_copy(host_v2, vcl_v2);
1170 
1171  for (std::size_t i=0; i < host_v1.size(); ++i)
1172  host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1173  vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1174 
1175  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1176  return EXIT_FAILURE;
1177 
1178  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1179  for (std::size_t i=0; i < host_v1.size(); ++i)
1180  host_v2[i] = NumericT(3.1415) * host_v1[i];
1181  proxy_copy(host_v1, vcl_v1);
1182  proxy_copy(host_v2, vcl_v2);
1183 
1184  for (std::size_t i=0; i < host_v1.size(); ++i)
1185  host_v1[i] -= alpha * host_v1[i] + beta * host_v2[i];
1186  vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
1187 
1188  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1189  return EXIT_FAILURE;
1190 
1191  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1192  for (std::size_t i=0; i < host_v1.size(); ++i)
1193  host_v2[i] = NumericT(3.1415) * host_v1[i];
1194  proxy_copy(host_v1, vcl_v1);
1195  proxy_copy(host_v2, vcl_v2);
1196 
1197  for (std::size_t i=0; i < host_v1.size(); ++i)
1198  host_v1[i] -= alpha * host_v1[i] - beta * host_v2[i];
1199  vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1200 
1201  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1202  return EXIT_FAILURE;
1203 
1204 
1205  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
1206  for (std::size_t i=0; i < host_v1.size(); ++i)
1207  host_v2[i] = NumericT(3.1415) * host_v1[i];
1208  proxy_copy(host_v1, vcl_v1);
1209  proxy_copy(host_v2, vcl_v2);
1210 
1211  for (std::size_t i=0; i < host_v1.size(); ++i)
1212  host_v1[i] -= alpha * host_v2[i];
1213  vcl_v1 -= gpu_alpha * vcl_v2;
1214 
1215  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1216  return EXIT_FAILURE;
1217 
1218 
1219 
1220  //
1221  // division-subtract
1222  //
1223  std::cout << "Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
1224  for (size_t i=0; i < host_v1.size(); ++i)
1225  {
1226  host_v1[i] = NumericT(1.0) + random<NumericT>();
1227  host_v2[i] = NumericT(3.1415) * host_v1[i];
1228  }
1229  proxy_copy(host_v1, vcl_v1);
1230  proxy_copy(host_v2, vcl_v2);
1231 
1232  for (std::size_t i=0; i < host_v1.size(); ++i)
1233  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1234  vcl_v1 = vcl_v1 - vcl_v2 / alpha;
1235 
1236  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1237  return EXIT_FAILURE;
1238 
1239 
1240  std::cout << "Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
1241  for (std::size_t i=0; i < host_v1.size(); ++i)
1242  host_v2[i] = NumericT(3.1415) * host_v1[i];
1243  proxy_copy(host_v1, vcl_v1);
1244  proxy_copy(host_v2, vcl_v2);
1245 
1246  for (std::size_t i=0; i < host_v1.size(); ++i)
1247  host_v1[i] = host_v1[i] / alpha - host_v2[i];
1248  vcl_v1 = vcl_v1 / alpha - vcl_v2;
1249 
1250  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1251  return EXIT_FAILURE;
1252 
1253  std::cout << "Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
1254  for (std::size_t i=0; i < host_v1.size(); ++i)
1255  host_v2[i] = NumericT(3.1415) * host_v1[i];
1256  proxy_copy(host_v1, vcl_v1);
1257  proxy_copy(host_v2, vcl_v2);
1258 
1259  for (std::size_t i=0; i < host_v1.size(); ++i)
1260  host_v1[i] = host_v1[i] / alpha - host_v2[i] / alpha;
1261  vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha;
1262 
1263  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1264  return EXIT_FAILURE;
1265 
1266 
1267  std::cout << "Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
1268  for (std::size_t i=0; i < host_v1.size(); ++i)
1269  host_v2[i] = NumericT(3.1415) * host_v1[i];
1270  proxy_copy(host_v1, vcl_v1);
1271  proxy_copy(host_v2, vcl_v2);
1272 
1273  for (std::size_t i=0; i < host_v1.size(); ++i)
1274  host_v1[i] -= host_v2[i] / alpha;
1275  vcl_v1 -= vcl_v2 / alpha;
1276 
1277  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1278  return EXIT_FAILURE;
1279 
1280  std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1281  for (std::size_t i=0; i < host_v1.size(); ++i)
1282  host_v2[i] = NumericT(3.1415) * host_v1[i];
1283  proxy_copy(host_v1, vcl_v1);
1284  proxy_copy(host_v2, vcl_v2);
1285 
1286  for (std::size_t i=0; i < host_v1.size(); ++i)
1287  host_v1[i] -= host_v2[i] / alpha;
1288  vcl_v1 -= vcl_v2 / gpu_alpha;
1289 
1290  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1291  return EXIT_FAILURE;
1292 
1293 
1294  std::cout << "Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
1295  for (std::size_t i=0; i < host_v1.size(); ++i)
1296  host_v2[i] = NumericT(3.1415) * host_v1[i];
1297  proxy_copy(host_v1, vcl_v1);
1298  proxy_copy(host_v2, vcl_v2);
1299 
1300  for (std::size_t i=0; i < host_v1.size(); ++i)
1301  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1302  vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1303 
1304  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1305  return EXIT_FAILURE;
1306 
1307  std::cout << "Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
1308  for (std::size_t i=0; i < host_v1.size(); ++i)
1309  host_v2[i] = NumericT(3.1415) * host_v1[i];
1310  proxy_copy(host_v1, vcl_v1);
1311  proxy_copy(host_v2, vcl_v2);
1312 
1313  for (std::size_t i=0; i < host_v1.size(); ++i)
1314  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1315  vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1316 
1317  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1318  return EXIT_FAILURE;
1319 
1320  std::cout << "Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
1321  for (std::size_t i=0; i < host_v1.size(); ++i)
1322  host_v2[i] = NumericT(3.1415) * host_v1[i];
1323  proxy_copy(host_v1, vcl_v1);
1324  proxy_copy(host_v2, vcl_v2);
1325 
1326  for (std::size_t i=0; i < host_v1.size(); ++i)
1327  host_v1[i] = host_v1[i] / alpha - host_v2[i] / beta;
1328  vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1329 
1330  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1331  return EXIT_FAILURE;
1332 
1333  std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1334  for (std::size_t i=0; i < host_v1.size(); ++i)
1335  host_v2[i] = NumericT(3.1415) * host_v1[i];
1336  proxy_copy(host_v1, vcl_v1);
1337  proxy_copy(host_v2, vcl_v2);
1338 
1339  for (std::size_t i=0; i < host_v1.size(); ++i)
1340  host_v1[i] -= host_v1[i] / alpha + host_v2[i] / beta;
1341  vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1342 
1343  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1344  return EXIT_FAILURE;
1345 
1346  std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1347  for (std::size_t i=0; i < host_v1.size(); ++i)
1348  host_v2[i] = NumericT(3.1415) * host_v1[i];
1349  proxy_copy(host_v1, vcl_v1);
1350  proxy_copy(host_v2, vcl_v2);
1351 
1352  for (std::size_t i=0; i < host_v1.size(); ++i)
1353  host_v1[i] -= host_v1[i] / alpha - host_v2[i] / beta;
1354  vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1355 
1356  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1357  return EXIT_FAILURE;
1358 
1359  std::cout << "Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
1360  for (std::size_t i=0; i < host_v1.size(); ++i)
1361  host_v2[i] = NumericT(3.1415) * host_v1[i];
1362  proxy_copy(host_v1, vcl_v1);
1363  proxy_copy(host_v2, vcl_v2);
1364 
1365  for (std::size_t i=0; i < host_v1.size(); ++i)
1366  host_v1[i] = host_v1[i] * alpha - host_v2[i] / beta;
1367  vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1368 
1369  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1370  return EXIT_FAILURE;
1371 
1372  std::cout << "Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
1373  for (std::size_t i=0; i < host_v1.size(); ++i)
1374  host_v2[i] = NumericT(3.1415) * host_v1[i];
1375  proxy_copy(host_v1, vcl_v1);
1376  proxy_copy(host_v2, vcl_v2);
1377 
1378  for (std::size_t i=0; i < host_v1.size(); ++i)
1379  host_v1[i] = host_v1[i] / alpha - host_v2[i] * beta;
1380  vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1381 
1382  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1383  return EXIT_FAILURE;
1384 
1385  std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
1386  for (std::size_t i=0; i < host_v1.size(); ++i)
1387  host_v2[i] = NumericT(3.1415) * host_v1[i];
1388  proxy_copy(host_v1, vcl_v1);
1389  proxy_copy(host_v2, vcl_v2);
1390 
1391  for (std::size_t i=0; i < host_v1.size(); ++i)
1392  host_v1[i] -= host_v1[i] * alpha + host_v2[i] / beta;
1393  vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
1394 
1395  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1396  return EXIT_FAILURE;
1397 
1398  std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
1399  for (std::size_t i=0; i < host_v1.size(); ++i)
1400  host_v2[i] = NumericT(3.1415) * host_v1[i];
1401  proxy_copy(host_v1, vcl_v1);
1402  proxy_copy(host_v2, vcl_v2);
1403 
1404  for (std::size_t i=0; i < host_v1.size(); ++i)
1405  host_v1[i] -= host_v1[i] / alpha + host_v2[i] * beta;
1406  vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1407 
1408  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1409  return EXIT_FAILURE;
1410 
1411  std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1412  for (std::size_t i=0; i < host_v1.size(); ++i)
1413  host_v2[i] = NumericT(3.1415) * host_v1[i];
1414  proxy_copy(host_v1, vcl_v1);
1415  proxy_copy(host_v2, vcl_v2);
1416 
1417  for (std::size_t i=0; i < host_v1.size(); ++i)
1418  host_v1[i] -= host_v1[i] * alpha - host_v2[i] / beta;
1419  vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1420 
1421  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1422  return EXIT_FAILURE;
1423 
1424  std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1425  for (std::size_t i=0; i < host_v1.size(); ++i)
1426  host_v2[i] = NumericT(3.1415) * host_v1[i];
1427  proxy_copy(host_v1, vcl_v1);
1428  proxy_copy(host_v2, vcl_v2);
1429 
1430  for (std::size_t i=0; i < host_v1.size(); ++i)
1431  host_v1[i] -= host_v1[i] / alpha - host_v2[i] * beta;
1432  vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1433 
1434  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1435  return EXIT_FAILURE;
1436 
1437 
1438  std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1439  for (std::size_t i=0; i < host_v1.size(); ++i)
1440  host_v2[i] = NumericT(3.1415) * host_v1[i];
1441  proxy_copy(host_v1, vcl_v1);
1442  proxy_copy(host_v2, vcl_v2);
1443 
1444  for (std::size_t i=0; i < host_v1.size(); ++i)
1445  host_v1[i] -= alpha * host_v2[i];
1446  vcl_v1 -= gpu_alpha * vcl_v2;
1447 
1448  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1449  return EXIT_FAILURE;
1450 
1451 
1452 
1453  //
1454  // More complicated expressions (for ensuring the operator overloads work correctly)
1455  //
1456  for (std::size_t i=0; i < host_v1.size(); ++i)
1457  {
1458  host_v1[i] = NumericT(1.0) + random<NumericT>();
1459  host_v2[i] = NumericT(3.1415) * host_v1[i];
1460  }
1461  proxy_copy(host_v1, vcl_v1);
1462  proxy_copy(host_v2, vcl_v2);
1463 
1464  std::cout << "Testing three vector additions..." << std::endl;
1465  for (std::size_t i=0; i < host_v1.size(); ++i)
1466  host_v1[i] = host_v2[i] + host_v1[i] + host_v2[i];
1467  vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
1468 
1469  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1470  return EXIT_FAILURE;
1471 
1472 
1473  for (std::size_t i=0; i < host_v1.size(); ++i)
1474  host_v2[i] = NumericT(3.1415) * host_v1[i];
1475  proxy_copy(host_v1, vcl_v1);
1476  proxy_copy(host_v2, vcl_v2);
1477 
1478  std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl;
1479  for (std::size_t i=0; i < host_v1.size(); ++i)
1480  host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1481  vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
1482 
1483  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1484  return EXIT_FAILURE;
1485 
1486  std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl;
1487  for (std::size_t i=0; i < host_v1.size(); ++i)
1488  host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1489  vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
1490 
1491  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1492  return EXIT_FAILURE;
1493 
1494  // --------------------------------------------------------------------------
1495  for (std::size_t i=0; i < host_v1.size(); ++i)
1496  host_v2[i] = NumericT(3.1415) * host_v1[i];
1497  proxy_copy(host_v1, vcl_v1);
1498  proxy_copy(host_v2, vcl_v2);
1499 
1500  std::cout << "Testing swap..." << std::endl;
1501  for (std::size_t i=0; i < host_v1.size(); ++i)
1502  {
1503  NumericT temp = host_v1[i];
1504  host_v1[i] = host_v2[i];
1505  host_v2[i] = temp;
1506  }
1507  swap(vcl_v1, vcl_v2);
1508 
1509  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1510  return EXIT_FAILURE;
1511 
1512  // --------------------------------------------------------------------------
1513  for (std::size_t i=0; i<host_v1.size(); ++i)
1514  {
1515  host_v1[i] = NumericT(1.0) + random<NumericT>();
1516  host_v2[i] = NumericT(5.0) + random<NumericT>();
1517  }
1518 
1519  proxy_copy(host_v1, vcl_v1);
1520  proxy_copy(host_v2, vcl_v2);
1521 
1522  std::cout << "Testing unary operator-..." << std::endl;
1523  for (std::size_t i=0; i < host_v1.size(); ++i)
1524  host_v1[i] = - host_v2[i];
1525  vcl_v1 = - vcl_v2;
1526 
1527  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1528  return EXIT_FAILURE;
1529 
1530 
1531  std::cout << "Testing elementwise multiplication..." << std::endl;
1532  std::cout << " v1 = element_prod(v1, v2);" << std::endl;
1533  for (std::size_t i=0; i < host_v1.size(); ++i)
1534  host_v1[i] = host_v1[i] * host_v2[i];
1535  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1536 
1537  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1538  return EXIT_FAILURE;
1539 
1540  std::cout << " v1 += element_prod(v1, v2);" << std::endl;
1541  for (std::size_t i=0; i < host_v1.size(); ++i)
1542  host_v1[i] += host_v1[i] * host_v2[i];
1543  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1544 
1545  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1546  return EXIT_FAILURE;
1547 
1548  std::cout << " v1 -= element_prod(v1, v2);" << std::endl;
1549  for (std::size_t i=0; i < host_v1.size(); ++i)
1550  host_v1[i] -= host_v1[i] * host_v2[i];
1551  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1552 
1553  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1554  return EXIT_FAILURE;
1555 
1557  std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl;
1558  for (std::size_t i=0; i < host_v1.size(); ++i)
1559  host_v1[i] = (host_v1[i] + host_v2[i]) * host_v2[i];
1560  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1561 
1562  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1563  return EXIT_FAILURE;
1564 
1565  std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl;
1566  for (std::size_t i=0; i < host_v1.size(); ++i)
1567  host_v1[i] += (host_v1[i] + host_v2[i]) * host_v2[i];
1568  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1569 
1570  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1571  return EXIT_FAILURE;
1572 
1573  std::cout << " v1 -= element_prod(v1 + v2, v2);" << std::endl;
1574  for (std::size_t i=0; i < host_v1.size(); ++i)
1575  host_v1[i] -= (host_v1[i] + host_v2[i]) * host_v2[i];
1576  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1577 
1578  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1579  return EXIT_FAILURE;
1580 
1582  std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl;
1583  for (std::size_t i=0; i < host_v1.size(); ++i)
1584  host_v1[i] = host_v1[i] * (host_v2[i] + host_v1[i]);
1585  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1586 
1587  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1588  return EXIT_FAILURE;
1589 
1590  std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl;
1591  for (std::size_t i=0; i < host_v1.size(); ++i)
1592  host_v1[i] += host_v1[i] * (host_v2[i] + host_v1[i]);
1593  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1594 
1595  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1596  return EXIT_FAILURE;
1597 
1598  std::cout << " v1 -= element_prod(v1, v2 + v1);" << std::endl;
1599  for (std::size_t i=0; i < host_v1.size(); ++i)
1600  host_v1[i] -= host_v1[i] * (host_v2[i] + host_v1[i]);
1601  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1602 
1603  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1604  return EXIT_FAILURE;
1605 
1607  std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
1608  for (std::size_t i=0; i < host_v1.size(); ++i)
1609  host_v1[i] = (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1610  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1611 
1612  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1613  return EXIT_FAILURE;
1614 
1615  std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
1616  for (std::size_t i=0; i < host_v1.size(); ++i)
1617  host_v1[i] += (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1618  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1619 
1620  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1621  return EXIT_FAILURE;
1622 
1623  std::cout << " v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
1624  for (std::size_t i=0; i < host_v1.size(); ++i)
1625  host_v1[i] -= (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1626  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1627 
1628  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1629  return EXIT_FAILURE;
1630 
1631 
1632  std::cout << "Testing elementwise division..." << std::endl;
1633  for (std::size_t i=0; i<host_v1.size(); ++i)
1634  {
1635  host_v1[i] = NumericT(1.0) + random<NumericT>();
1636  host_v2[i] = NumericT(5.0) + random<NumericT>();
1637  }
1638 
1639  proxy_copy(host_v1, vcl_v1);
1640  proxy_copy(host_v2, vcl_v2);
1641 
1642  for (std::size_t i=0; i < host_v1.size(); ++i)
1643  host_v1[i] = host_v1[i] / host_v2[i];
1644  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2);
1645 
1646  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1647  return EXIT_FAILURE;
1648 
1649  for (std::size_t i=0; i < host_v1.size(); ++i)
1650  host_v1[i] += host_v1[i] / host_v2[i];
1651  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2);
1652 
1653  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1654  return EXIT_FAILURE;
1655 
1656  for (std::size_t i=0; i < host_v1.size(); ++i)
1657  host_v1[i] -= host_v1[i] / host_v2[i];
1658  vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2);
1659 
1660  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1661  return EXIT_FAILURE;
1662 
1664  for (std::size_t i=0; i < host_v1.size(); ++i)
1665  host_v1[i] = (host_v1[i] + host_v2[i]) / host_v2[i];
1666  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1667 
1668  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1669  return EXIT_FAILURE;
1670 
1671  for (std::size_t i=0; i < host_v1.size(); ++i)
1672  host_v1[i] += (host_v1[i] + host_v2[i]) / host_v2[i];
1673  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1674 
1675  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1676  return EXIT_FAILURE;
1677 
1678  for (std::size_t i=0; i < host_v1.size(); ++i)
1679  host_v1[i] -= (host_v1[i] + host_v2[i]) / host_v2[i];
1680  vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1681 
1682  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1683  return EXIT_FAILURE;
1684 
1686  for (std::size_t i=0; i < host_v1.size(); ++i)
1687  host_v1[i] = host_v1[i] / (host_v2[i] + host_v1[i]);
1688  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1689 
1690  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1691  return EXIT_FAILURE;
1692 
1693  for (std::size_t i=0; i < host_v1.size(); ++i)
1694  host_v1[i] += host_v1[i] / (host_v2[i] + host_v1[i]);
1695  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1696 
1697  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1698  return EXIT_FAILURE;
1699 
1700  for (std::size_t i=0; i < host_v1.size(); ++i)
1701  host_v1[i] -= host_v1[i] / (host_v2[i] + host_v1[i]);
1702  vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1703 
1704  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1705  return EXIT_FAILURE;
1706 
1708  for (std::size_t i=0; i < host_v1.size(); ++i)
1709  host_v1[i] = (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1710  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1711 
1712  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1713  return EXIT_FAILURE;
1714 
1715  for (std::size_t i=0; i < host_v1.size(); ++i)
1716  host_v1[i] += (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1717  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1718 
1719  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1720  return EXIT_FAILURE;
1721 
1722  for (std::size_t i=0; i < host_v1.size(); ++i)
1723  host_v1[i] -= (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1724  vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1725 
1726  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1727  return EXIT_FAILURE;
1728 
1729 
1730  std::cout << "Testing elementwise power function..." << std::endl;
1731  for (std::size_t i=0; i<host_v1.size(); ++i)
1732  {
1733  host_v1[i] = NumericT(1.1) + NumericT(0.5) * random<NumericT>();
1734  host_v2[i] = NumericT(1.1) + NumericT(0.5) * random<NumericT>();
1735  }
1736  std::vector<NumericT> std_v3(host_v1.size());
1737  vector_proxy<NumericT> host_v3(&std_v3[0], 0, 1, host_v1.size());
1738 
1739  proxy_copy(host_v1, vcl_v1);
1740  proxy_copy(host_v2, vcl_v2);
1741 
1742  for (std::size_t i=0; i<host_v3.size(); ++i)
1743  host_v3[i] = std::pow(host_v1[i], host_v2[i]);
1744  vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1745 
1746  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1747  {
1748  std::cerr << "** Failure in v1 = pow(v1, v2);" << std::endl;
1749  return EXIT_FAILURE;
1750  }
1751 
1752  proxy_copy(host_v1, vcl_v1);
1753  for (std::size_t i=0; i<host_v3.size(); ++i)
1754  host_v3[i] = host_v1[i];
1755  for (std::size_t i=0; i<host_v3.size(); ++i)
1756  host_v3[i] += std::pow(host_v1[i], host_v2[i]);
1757  vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1758 
1759  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1760  {
1761  std::cerr << "** Failure in v1 += pow(v1, v2);" << std::endl;
1762  return EXIT_FAILURE;
1763  }
1764 
1765  proxy_copy(host_v1, vcl_v1);
1766  for (std::size_t i=0; i<host_v3.size(); ++i)
1767  host_v3[i] = host_v1[i];
1768  for (std::size_t i=0; i<host_v3.size(); ++i)
1769  host_v3[i] -= std::pow(host_v1[i], host_v2[i]);
1770  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1771 
1772  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1773  {
1774  std::cerr << "** Failure in v1 -= pow(v1, v2);" << std::endl;
1775  return EXIT_FAILURE;
1776  }
1777 
1779  proxy_copy(host_v1, vcl_v1);
1780  for (std::size_t i=0; i<host_v3.size(); ++i)
1781  host_v3[i] = host_v1[i];
1782  for (std::size_t i=0; i<host_v3.size(); ++i)
1783  host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1784  vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1785 
1786  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1787  {
1788  std::cerr << "** Failure in v1 = pow(v1 + v2, v2);" << std::endl;
1789  return EXIT_FAILURE;
1790  }
1791 
1792  proxy_copy(host_v1, vcl_v1);
1793  for (std::size_t i=0; i<host_v3.size(); ++i)
1794  host_v3[i] = host_v1[i];
1795  for (std::size_t i=0; i<host_v3.size(); ++i)
1796  host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1797  vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1798 
1799  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1800  {
1801  std::cerr << "** Failure in v1 += pow(v1 + v2, v2);" << std::endl;
1802  return EXIT_FAILURE;
1803  }
1804 
1805  proxy_copy(host_v1, vcl_v1);
1806  for (std::size_t i=0; i<host_v3.size(); ++i)
1807  host_v3[i] = host_v1[i];
1808  for (std::size_t i=0; i<host_v3.size(); ++i)
1809  host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1810  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1811 
1812  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1813  {
1814  std::cerr << "** Failure in v1 -= pow(v1 + v2, v2);" << std::endl;
1815  return EXIT_FAILURE;
1816  }
1817 
1819  proxy_copy(host_v1, vcl_v1);
1820  for (std::size_t i=0; i<host_v3.size(); ++i)
1821  host_v3[i] = host_v1[i];
1822  for (std::size_t i=0; i<host_v3.size(); ++i)
1823  host_v3[i] = std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1824  vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1825 
1826  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1827  {
1828  std::cerr << "** Failure in v1 = pow(v1, v2 + v1);" << std::endl;
1829  return EXIT_FAILURE;
1830  }
1831 
1832  proxy_copy(host_v1, vcl_v1);
1833  for (std::size_t i=0; i<host_v3.size(); ++i)
1834  host_v3[i] = host_v1[i];
1835  for (std::size_t i=0; i<host_v3.size(); ++i)
1836  host_v3[i] += std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1837  vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1838 
1839  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1840  {
1841  std::cerr << "** Failure in v1 += pow(v1, v2 + v1);" << std::endl;
1842  return EXIT_FAILURE;
1843  }
1844 
1845  proxy_copy(host_v1, vcl_v1);
1846  for (std::size_t i=0; i<host_v3.size(); ++i)
1847  host_v3[i] = host_v1[i];
1848  for (std::size_t i=0; i<host_v3.size(); ++i)
1849  host_v3[i] -= std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1850  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1851 
1852  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1853  {
1854  std::cerr << "** Failure in v1 -= pow(v1, v2 + v1);" << std::endl;
1855  return EXIT_FAILURE;
1856  }
1857 
1859  proxy_copy(host_v1, vcl_v1);
1860  for (std::size_t i=0; i<host_v3.size(); ++i)
1861  host_v3[i] = host_v1[i];
1862  for (std::size_t i=0; i<host_v3.size(); ++i)
1863  host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1864  vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1865 
1866  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1867  {
1868  std::cerr << "** Failure in v1 = pow(v1 + v2, v2 + v1);" << std::endl;
1869  return EXIT_FAILURE;
1870  }
1871 
1872  proxy_copy(host_v1, vcl_v1);
1873  for (std::size_t i=0; i<host_v3.size(); ++i)
1874  host_v3[i] = host_v1[i];
1875  for (std::size_t i=0; i<host_v3.size(); ++i)
1876  host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1877  vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1878 
1879  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1880  {
1881  std::cerr << "** Failure in v1 += pow(v1 + v2, v2 + v1);" << std::endl;
1882  return EXIT_FAILURE;
1883  }
1884 
1885  proxy_copy(host_v1, vcl_v1);
1886  for (std::size_t i=0; i<host_v3.size(); ++i)
1887  host_v3[i] = host_v1[i];
1888  for (std::size_t i=0; i<host_v3.size(); ++i)
1889  host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1890  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1891 
1892  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1893  {
1894  std::cerr << "** Failure in v1 -= pow(v1 + v2, v2 + v1);" << std::endl;
1895  return EXIT_FAILURE;
1896  }
1897 
1898  std::cout << "Testing unary elementwise operations..." << std::endl;
1899  for (size_t i=0; i < host_v1.size(); ++i)
1900  host_v1[i] = random<NumericT>() / NumericT(4);
1901 
1902 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
1903  for (std::size_t i=0; i<host_v1.size(); ++i) \
1904  host_v2[i] = NumericT(3.1415) * host_v1[i]; \
1905  proxy_copy(host_v1, vcl_v1); \
1906  proxy_copy(host_v2, vcl_v2); \
1907  \
1908  for (std::size_t i=0; i<host_v1.size(); ++i) \
1909  host_v1[i] = std::FUNCNAME(host_v2[i]); \
1910  vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1911  \
1912  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1913  { \
1914  std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \
1915  return EXIT_FAILURE; \
1916  } \
1917  \
1918  for (std::size_t i=0; i<host_v1.size(); ++i) \
1919  host_v1[i] = std::FUNCNAME(host_v1[i] + host_v2[i]); \
1920  vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1921  \
1922  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1923  { \
1924  std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1925  return EXIT_FAILURE; \
1926  } \
1927  \
1928  for (std::size_t i=0; i<host_v1.size(); ++i) \
1929  host_v1[i] += std::FUNCNAME(host_v1[i]); \
1930  vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \
1931  \
1932  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1933  { \
1934  std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \
1935  return EXIT_FAILURE; \
1936  } \
1937  \
1938  for (std::size_t i=0; i<host_v1.size(); ++i) \
1939  host_v1[i] += std::FUNCNAME(host_v1[i] + host_v2[i]); \
1940  vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1941  \
1942  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1943  { \
1944  std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1945  return EXIT_FAILURE; \
1946  } \
1947  \
1948  for (std::size_t i=0; i<host_v1.size(); ++i) \
1949  host_v1[i] -= std::FUNCNAME(host_v2[i]); \
1950  vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1951  \
1952  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1953  { \
1954  std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \
1955  return EXIT_FAILURE; \
1956  } \
1957  \
1958  for (std::size_t i=0; i<host_v1.size(); ++i) \
1959  host_v1[i] -= std::FUNCNAME(host_v1[i] + host_v2[i]); \
1960  vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1961  \
1962  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1963  { \
1964  std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1965  return EXIT_FAILURE; \
1966  } \
1967 
1969  GENERATE_UNARY_OP_TEST(cosh);
1970  for (std::size_t i=0; i < host_v1.size(); ++i)
1971  host_v1[i] = random<NumericT>() / NumericT(4);
1973  GENERATE_UNARY_OP_TEST(floor);
1974  GENERATE_UNARY_OP_TEST(fabs);
1976  GENERATE_UNARY_OP_TEST(log10);
1978  GENERATE_UNARY_OP_TEST(sinh);
1979  GENERATE_UNARY_OP_TEST(fabs);
1980  //GENERATE_UNARY_OP_TEST(abs); //OpenCL allows abs on integers only
1981  GENERATE_UNARY_OP_TEST(sqrt);
1983  GENERATE_UNARY_OP_TEST(tanh);
1984 
1985  // --------------------------------------------------------------------------
1986  for (std::size_t i=0; i<host_v1.size(); ++i)
1987  host_v2[i] = NumericT(3.1415) * host_v1[i];
1988  proxy_copy(host_v1, vcl_v1);
1989  proxy_copy(host_v2, vcl_v2);
1990 
1991  std::cout << "Testing another complicated vector expression with CPU scalars..." << std::endl;
1992  for (std::size_t i=0; i<host_v1.size(); ++i)
1993  host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
1994  vcl_v1 = vcl_v2 / alpha + beta * (vcl_v1 - alpha*vcl_v2);
1995 
1996  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1997  return EXIT_FAILURE;
1998 
1999  std::cout << "Testing another complicated vector expression with GPU scalars..." << std::endl;
2000  for (std::size_t i=0; i<host_v1.size(); ++i)
2001  host_v2[i] = NumericT(3.1415) * host_v1[i];
2002  proxy_copy(host_v1, vcl_v1);
2003  proxy_copy(host_v2, vcl_v2);
2004 
2005  for (std::size_t i=0; i<host_v1.size(); ++i)
2006  host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2007  vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1 - gpu_alpha*vcl_v2);
2008 
2009  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2010  return EXIT_FAILURE;
2011 
2012 
2013  std::cout << "Testing lenghty sum of scaled vectors..." << std::endl;
2014  for (std::size_t i=0; i<host_v1.size(); ++i)
2015  host_v2[i] = NumericT(3.1415) * host_v1[i];
2016  proxy_copy(host_v1, vcl_v1);
2017  proxy_copy(host_v2, vcl_v2);
2018 
2019  for (std::size_t i=0; i<host_v1.size(); ++i)
2020  host_v1[i] = host_v2[i] / alpha + beta * host_v1[i] - alpha * host_v2[i] + beta * host_v1[i] - alpha * host_v1[i];
2021  vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
2022 
2023  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2024  return EXIT_FAILURE;
2025 
2026  // --------------------------------------------------------------------------
2027  return retval;
2028 }
2029 
2030 
2031 template< typename NumericT, typename Epsilon >
2032 int test(Epsilon const& epsilon)
2033 {
2034  int retval = EXIT_SUCCESS;
2035  std::size_t size = 24656;
2036 
2037  std::cout << "Running tests for vector of size " << size << std::endl;
2038 
2039  //
2040  // Set up host objects
2041  //
2042  std::vector<NumericT> std_full_vec(size);
2043  std::vector<NumericT> std_full_vec2(std_full_vec.size());
2044 
2045  for (std::size_t i=0; i<std_full_vec.size(); ++i)
2046  {
2047  std_full_vec[i] = NumericT(1.0) + random<NumericT>();
2048  std_full_vec2[i] = NumericT(1.0) + random<NumericT>();
2049  }
2050 
2051  std::size_t r1_start = std_full_vec.size() / 4;
2052  std::size_t r1_stop = 2 * std_full_vec.size() / 4;
2053  std::size_t r2_start = 2 * std_full_vec2.size() / 4;
2054  std::size_t r2_stop = 3 * std_full_vec2.size() / 4;
2055  vector_proxy<NumericT> host_range_vec (&std_full_vec[0], r1_start, 1, r1_stop - r1_start);
2056  vector_proxy<NumericT> host_range_vec2(&std_full_vec2[0], r2_start, 1, r2_stop - r2_start);
2057 
2058  std::size_t s1_start = std_full_vec.size() / 4;
2059  std::size_t s1_inc = 3;
2060  std::size_t s1_size = std_full_vec.size() / 4;
2061  std::size_t s2_start = 2 * std_full_vec2.size() / 4;
2062  std::size_t s2_inc = 2;
2063  std::size_t s2_size = std_full_vec2.size() / 4;
2064  vector_proxy<NumericT> host_slice_vec (&std_full_vec[0], s1_start, s1_inc, s1_size);
2065  vector_proxy<NumericT> host_slice_vec2(&std_full_vec2[0], s2_start, s2_inc, s2_size);
2066 
2067  //
2068  // Set up ViennaCL objects
2069  //
2070  viennacl::vector<NumericT> vcl_full_vec(std_full_vec.size());
2071  viennacl::vector<NumericT> vcl_full_vec2(std_full_vec2.size());
2072 
2073  viennacl::fast_copy(std_full_vec.begin(), std_full_vec.end(), vcl_full_vec.begin());
2074  viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
2075 
2076  viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
2077  viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
2078  viennacl::vector_range< viennacl::vector<NumericT> > vcl_range_vec(vcl_full_vec, vcl_r1);
2079  viennacl::vector_range< viennacl::vector<NumericT> > vcl_range_vec2(vcl_full_vec2, vcl_r2);
2080 
2081  {
2082  viennacl::vector<NumericT> vcl_short_vec(vcl_range_vec);
2083  viennacl::vector<NumericT> vcl_short_vec2 = vcl_range_vec2;
2084 
2085  std::vector<NumericT> std_short_vec(host_range_vec.size());
2086  for (std::size_t i=0; i<std_short_vec.size(); ++i)
2087  std_short_vec[i] = host_range_vec[i];
2088  vector_proxy<NumericT> host_short_vec(&std_short_vec[0], 0, 1, std_short_vec.size());
2089 
2090  std::vector<NumericT> std_short_vec2(host_range_vec2.size());
2091  for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2092  std_short_vec2[i] = host_range_vec2[i];
2093  vector_proxy<NumericT> host_short_vec2(&std_short_vec2[0], 0, 1, std_short_vec.size());
2094 
2095  std::cout << "Testing creation of vectors from range..." << std::endl;
2096  if (check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2097  return EXIT_FAILURE;
2098  if (check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2099  return EXIT_FAILURE;
2100  }
2101 
2102  viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
2103  viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
2104  viennacl::vector_slice< viennacl::vector<NumericT> > vcl_slice_vec(vcl_full_vec, vcl_s1);
2105  viennacl::vector_slice< viennacl::vector<NumericT> > vcl_slice_vec2(vcl_full_vec2, vcl_s2);
2106 
2107  viennacl::vector<NumericT> vcl_short_vec(vcl_slice_vec);
2108  viennacl::vector<NumericT> vcl_short_vec2 = vcl_slice_vec2;
2109 
2110  std::vector<NumericT> std_short_vec(host_slice_vec.size());
2111  for (std::size_t i=0; i<std_short_vec.size(); ++i)
2112  std_short_vec[i] = host_slice_vec[i];
2113  vector_proxy<NumericT> host_short_vec(&std_short_vec[0], 0, 1, std_short_vec.size());
2114 
2115  std::vector<NumericT> std_short_vec2(host_slice_vec2.size());
2116  for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2117  std_short_vec2[i] = host_slice_vec2[i];
2118  vector_proxy<NumericT> host_short_vec2(&std_short_vec2[0], 0, 1, std_short_vec.size());
2119 
2120  std::cout << "Testing creation of vectors from slice..." << std::endl;
2121  if (check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2122  return EXIT_FAILURE;
2123  if (check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2124  return EXIT_FAILURE;
2125 
2126 
2127  //
2128  // Now start running tests for vectors, ranges and slices:
2129  //
2130 
2131  std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
2132  retval = test<NumericT>(epsilon,
2133  host_short_vec, host_short_vec2,
2134  vcl_short_vec, vcl_short_vec2);
2135  if (retval != EXIT_SUCCESS)
2136  return EXIT_FAILURE;
2137 
2138  std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
2139  retval = test<NumericT>(epsilon,
2140  host_short_vec, host_short_vec2,
2141  vcl_short_vec, vcl_range_vec2);
2142  if (retval != EXIT_SUCCESS)
2143  return EXIT_FAILURE;
2144 
2145  std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
2146  retval = test<NumericT>(epsilon,
2147  host_short_vec, host_short_vec2,
2148  vcl_short_vec, vcl_slice_vec2);
2149  if (retval != EXIT_SUCCESS)
2150  return EXIT_FAILURE;
2151 
2153 
2154  std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
2155  retval = test<NumericT>(epsilon,
2156  host_short_vec, host_short_vec2,
2157  vcl_range_vec, vcl_short_vec2);
2158  if (retval != EXIT_SUCCESS)
2159  return EXIT_FAILURE;
2160 
2161  std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
2162  retval = test<NumericT>(epsilon,
2163  host_short_vec, host_short_vec2,
2164  vcl_range_vec, vcl_range_vec2);
2165  if (retval != EXIT_SUCCESS)
2166  return EXIT_FAILURE;
2167 
2168  std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
2169  retval = test<NumericT>(epsilon,
2170  host_short_vec, host_short_vec2,
2171  vcl_range_vec, vcl_slice_vec2);
2172  if (retval != EXIT_SUCCESS)
2173  return EXIT_FAILURE;
2174 
2176 
2177  std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
2178  retval = test<NumericT>(epsilon,
2179  host_short_vec, host_short_vec2,
2180  vcl_slice_vec, vcl_short_vec2);
2181  if (retval != EXIT_SUCCESS)
2182  return EXIT_FAILURE;
2183 
2184  std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
2185  retval = test<NumericT>(epsilon,
2186  host_short_vec, host_short_vec2,
2187  vcl_slice_vec, vcl_range_vec2);
2188  if (retval != EXIT_SUCCESS)
2189  return EXIT_FAILURE;
2190 
2191  std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
2192  retval = test<NumericT>(epsilon,
2193  host_short_vec, host_short_vec2,
2194  vcl_slice_vec, vcl_slice_vec2);
2195  if (retval != EXIT_SUCCESS)
2196  return EXIT_FAILURE;
2197 
2198  return EXIT_SUCCESS;
2199 }
2200 
2201 
2202 //
2203 // -------------------------------------------------------------
2204 //
2205 int main()
2206 {
2207  std::cout << std::endl;
2208  std::cout << "----------------------------------------------" << std::endl;
2209  std::cout << "----------------------------------------------" << std::endl;
2210  std::cout << "## Test :: Vector" << std::endl;
2211  std::cout << "----------------------------------------------" << std::endl;
2212  std::cout << "----------------------------------------------" << std::endl;
2213  std::cout << std::endl;
2214 
2215  int retval = EXIT_SUCCESS;
2216 
2217  std::cout << std::endl;
2218  std::cout << "----------------------------------------------" << std::endl;
2219  std::cout << std::endl;
2220  {
2221  typedef float NumericT;
2222  NumericT epsilon = static_cast<NumericT>(1.0E-2);
2223  std::cout << "# Testing setup:" << std::endl;
2224  std::cout << " eps: " << epsilon << std::endl;
2225  std::cout << " numeric: float" << std::endl;
2226  retval = test<NumericT>(epsilon);
2227  if ( retval == EXIT_SUCCESS )
2228  std::cout << "# Test passed" << std::endl;
2229  else
2230  return retval;
2231  }
2232  std::cout << std::endl;
2233  std::cout << "----------------------------------------------" << std::endl;
2234  std::cout << std::endl;
2235  #ifdef VIENNACL_WITH_OPENCL
2237  #endif
2238  {
2239  {
2240  typedef double NumericT;
2241  NumericT epsilon = 1.0E-10;
2242  std::cout << "# Testing setup:" << std::endl;
2243  std::cout << " eps: " << epsilon << std::endl;
2244  std::cout << " numeric: double" << std::endl;
2245  retval = test<NumericT>(epsilon);
2246  if ( retval == EXIT_SUCCESS )
2247  std::cout << "# Test passed" << std::endl;
2248  else
2249  return retval;
2250  }
2251  std::cout << std::endl;
2252  std::cout << "----------------------------------------------" << std::endl;
2253  std::cout << std::endl;
2254  }
2255 
2256  std::cout << std::endl;
2257  std::cout << "------- Test completed --------" << std::endl;
2258  std::cout << std::endl;
2259 
2260 
2261  return retval;
2262 }
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:86
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
vector_proxy(NumericT *p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
NumericT & operator[](std::size_t index)
int test(Epsilon const &epsilon, HostVectorType &host_v1, HostVectorType &host_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:433
std::size_t size() const
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
int check(T1 const &t1, T2 const &t2, double epsilon)
void proxy_copy(vector_proxy< NumericT > const &host_vec, viennacl::vector_base< NumericT > &vcl_vec)
Class for representing strided subvectors of a bigger vector x.
Definition: forwards.h:436
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
NumericT const & operator[](std::size_t index) const
Proxy classes for vectors.
int main()
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
Represents a vector consisting of 1 at a given index and zeros otherwise.
Definition: vector_def.hpp:76
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
NumericT max(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:47
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
T norm_1(std::vector< T, A > const &v1)
Definition: norm_1.hpp:61
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:423
float ScalarType
Definition: fft_1d.cpp:42
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:834
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:428
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:232
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:91
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)