46 template<
typename NumericT>
50 vector_proxy(NumericT * p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
51 : values_(p_values), start_(start_idx), inc_(increment), size_(num_elements) {}
53 NumericT
const &
operator[](std::size_t index)
const {
return values_[start_ + index * inc_]; }
54 NumericT &
operator[](std::size_t index) {
return values_[start_ + index * inc_]; }
56 std::size_t
size()
const {
return size_; }
65 template<
typename NumericT>
68 std::vector<NumericT> std_vec(host_vec.
size());
70 for (std::size_t i=0; i<host_vec.
size(); ++i)
71 std_vec[i] = host_vec[i];
76 template<
typename NumericT>
79 std::vector<NumericT> std_vec(vcl_vec.
size());
83 for (std::size_t i=0; i<host_vec.
size(); ++i)
84 host_vec[i] = std_vec[i];
91 template<
typename ScalarType>
95 if (std::fabs(s1 - s2) > 0 )
96 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
102 template<
typename ScalarType>
106 if (std::fabs(s1 - s2) > 0 )
107 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
113 template<
typename ScalarType>
117 if (std::fabs(s1 - s2) > 0 )
118 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
124 template<
typename ScalarType,
typename ViennaCLVectorType>
127 std::vector<ScalarType> v2_cpu(vcl_vec.size());
131 for (
unsigned int i=0;i<v1.
size(); ++i)
133 if (
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
134 v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) /
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
140 for (std::size_t i=0; i<v2_cpu.size(); ++i)
141 ret =
std::max(ret, std::fabs(v2_cpu[i]));
146 template<
typename T1,
typename T2>
147 int check(T1
const & t1, T2
const & t2,
double epsilon)
149 int retval = EXIT_SUCCESS;
151 double temp = std::fabs(
diff(t1, t2));
154 std::cout <<
"# Error! Relative difference: " << temp << std::endl;
155 retval = EXIT_FAILURE;
164 template<
typename NumericT,
typename Epsilon,
typename HostVectorType,
typename ViennaCLVectorType1,
typename ViennaCLVectorType2 >
165 int test(Epsilon
const& epsilon,
166 HostVectorType & host_v1, HostVectorType & host_v2,
167 ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
169 int retval = EXIT_SUCCESS;
171 NumericT cpu_result = 42.0;
177 std::cout <<
"Checking for zero_vector initializer..." << std::endl;
178 for (std::size_t i=0; i<host_v1.size(); ++i)
179 host_v1[i] = NumericT(0);
181 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
184 std::cout <<
"Checking for scalar_vector initializer..." << std::endl;
185 for (std::size_t i=0; i<host_v1.size(); ++i)
186 host_v1[i] = NumericT(cpu_result);
188 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
191 for (std::size_t i=0; i<host_v1.size(); ++i)
192 host_v1[i] = NumericT(gpu_result);
194 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
197 std::cout <<
"Checking for unit_vector initializer..." << std::endl;
198 for (std::size_t i=0; i<host_v1.size(); ++i)
199 host_v1[i] = NumericT(0);
200 host_v1[5] = NumericT(1);
202 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
206 for (std::size_t i=0; i<host_v1.size(); ++i)
208 host_v1[i] = NumericT(1.0) + random<NumericT>();
209 host_v2[i] = NumericT(1.0) + random<NumericT>();
215 std::cout <<
"Checking for successful copy..." << std::endl;
216 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
218 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
226 std::cout <<
"Testing inner_prod..." << std::endl;
228 for (std::size_t i=0; i<host_v1.size(); ++i)
229 cpu_result += host_v1[i] * host_v2[i];
233 if (
check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
235 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
239 for (std::size_t i=0; i<host_v1.size(); ++i)
240 cpu_result += (host_v1[i] + host_v2[i]) * (host_v2[i] - host_v1[i]);
244 if (
check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS)
246 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
250 std::cout <<
"Testing norm_1..." << std::endl;
252 for (std::size_t i=0; i<host_v1.size(); ++i)
253 cpu_result += std::fabs(host_v1[i]);
256 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
259 gpu_result = 2 * cpu_result;
261 for (std::size_t i=0; i<host_v1.size(); ++i)
262 cpu_result += std::fabs(host_v1[i]);
263 gpu_result = cpu_result;
267 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
271 for (std::size_t i=0; i<host_v1.size(); ++i)
272 cpu_result += std::fabs(host_v1[i] + host_v2[i]);
273 gpu_result = cpu_result;
277 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
281 std::cout <<
"Testing norm_2..." << std::endl;
283 for (std::size_t i=0; i<host_v1.size(); ++i)
284 cpu_result += host_v1[i] * host_v1[i];
285 cpu_result = std::sqrt(cpu_result);
288 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
291 gpu_result = 2 * cpu_result;
293 for (std::size_t i=0; i<host_v1.size(); ++i)
294 cpu_result += host_v1[i] * host_v1[i];
295 gpu_result = std::sqrt(cpu_result);
298 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
302 for (std::size_t i=0; i<host_v1.size(); ++i)
303 cpu_result += (host_v1[i] + host_v2[i]) * (host_v1[i] + host_v2[i]);
304 gpu_result = std::sqrt(cpu_result);
307 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
311 std::cout <<
"Testing norm_inf..." << std::endl;
312 cpu_result = std::fabs(host_v1[0]);
313 for (std::size_t i=0; i<host_v1.size(); ++i)
314 cpu_result =
std::max(std::fabs(host_v1[i]), cpu_result);
317 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
320 gpu_result = 2 * cpu_result;
321 cpu_result = std::fabs(host_v1[0]);
322 for (std::size_t i=0; i<host_v1.size(); ++i)
323 cpu_result =
std::max(std::fabs(host_v1[i]), cpu_result);
324 gpu_result = cpu_result;
328 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
331 cpu_result = std::fabs(host_v1[0]);
332 for (std::size_t i=0; i<host_v1.size(); ++i)
333 cpu_result =
std::max(std::fabs(host_v1[i] + host_v2[i]), cpu_result);
334 gpu_result = cpu_result;
338 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
342 std::cout <<
"Testing index_norm_inf..." << std::endl;
343 std::size_t cpu_index = 0;
344 cpu_result = std::fabs(host_v1[0]);
345 for (std::size_t i=0; i<host_v1.size(); ++i)
347 if (std::fabs(host_v1[i]) > cpu_result)
349 cpu_result = std::fabs(host_v1[i]);
355 if (
check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS)
358 cpu_result = host_v1[cpu_index];
361 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
364 cpu_result = std::fabs(host_v1[0] + host_v2[0]);
365 for (std::size_t i=0; i<host_v1.size(); ++i)
367 if (std::fabs(host_v1[i] + host_v2[i]) > cpu_result)
369 cpu_result = std::fabs(host_v1[i] + host_v2[i]);
373 cpu_result = host_v1[cpu_index];
376 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
381 std::cout <<
"Testing max..." << std::endl;
382 cpu_result = host_v1[0];
383 for (std::size_t i=0; i<host_v1.size(); ++i)
384 cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
387 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
390 cpu_result = host_v1[0];
391 for (std::size_t i=0; i<host_v1.size(); ++i)
392 cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
393 gpu_result = cpu_result;
397 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
400 cpu_result = host_v1[0] + host_v2[0];
401 for (std::size_t i=0; i<host_v1.size(); ++i)
402 cpu_result = std::max<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
403 gpu_result = cpu_result;
407 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
412 std::cout <<
"Testing min..." << std::endl;
413 cpu_result = host_v1[0];
414 for (std::size_t i=0; i<host_v1.size(); ++i)
415 cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
418 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
421 cpu_result = host_v1[0];
422 for (std::size_t i=0; i<host_v1.size(); ++i)
423 cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
424 gpu_result = cpu_result;
428 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
431 cpu_result = host_v1[0] + host_v2[0];
432 for (std::size_t i=0; i<host_v1.size(); ++i)
433 cpu_result = std::min<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
434 gpu_result = cpu_result;
438 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
449 for (std::size_t i=0; i<host_v1.size(); ++i)
451 NumericT temp = NumericT(1.1) * host_v1[i] + NumericT(2.3) * host_v2[i];
452 host_v2[i] = - NumericT(2.3) * host_v1[i] + NumericT(1.1) * host_v2[i];
457 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
459 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
464 std::cout <<
"Testing assignments..." << std::endl;
465 NumericT val =
static_cast<NumericT
>(1e-1);
466 for (
size_t i=0; i < host_v1.size(); ++i)
469 for (
size_t i=0; i < vcl_v1.size(); ++i)
472 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
475 std::cout <<
"Testing assignments via iterators..." << std::endl;
477 host_v1[2] =
static_cast<NumericT
>(1.9);
478 vcl_v1[2] =
static_cast<NumericT
>(1.9);
480 host_v1[2] =
static_cast<NumericT
>(1.5);
481 typename ViennaCLVectorType1::iterator vcl_v1_it = vcl_v1.begin();
484 *vcl_v1_it =
static_cast<NumericT
>(1.5);
486 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
492 for (std::size_t i=0; i < host_v1.size(); ++i)
494 host_v1[i] = NumericT(1.0) + random<NumericT>();
495 host_v2[i] = NumericT(3.1415) * host_v1[i];
500 std::cout <<
"Testing scaling with CPU scalar..." << std::endl;
501 NumericT alpha =
static_cast<NumericT
>(1.7182);
504 for (std::size_t i=0; i < host_v1.size(); ++i)
505 host_v1[i] *=
long(alpha);
506 vcl_v1 *= long(alpha);
508 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
511 for (std::size_t i=0; i < host_v1.size(); ++i)
512 host_v1[i] *=
float(alpha);
513 vcl_v1 *= float(alpha);
515 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
518 for (std::size_t i=0; i < host_v1.size(); ++i)
519 host_v1[i] *=
double(alpha);
520 vcl_v1 *= double(alpha);
522 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
526 std::cout <<
"Testing scaling with GPU scalar..." << std::endl;
527 for (std::size_t i=0; i < host_v1.size(); ++i)
531 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
534 std::cout <<
"Testing scaling with scalar expression..." << std::endl;
536 for (std::size_t i=0; i < host_v1.size(); ++i)
537 cpu_result += host_v1[i] * host_v2[i];
538 for (std::size_t i=0; i < host_v1.size(); ++i)
539 host_v1[i] *= cpu_result;
542 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
545 NumericT beta =
static_cast<NumericT
>(1.4153);
548 std::cout <<
"Testing shrinking with CPU scalar..." << std::endl;
549 for (std::size_t i=0; i < host_v1.size(); ++i)
550 host_v1[i] /=
long(beta);
551 vcl_v1 /= long(beta);
553 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
556 for (std::size_t i=0; i < host_v1.size(); ++i)
557 host_v1[i] /=
float(beta);
558 vcl_v1 /= float(beta);
560 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
563 for (std::size_t i=0; i < host_v1.size(); ++i)
564 host_v1[i] /=
double(beta);
565 vcl_v1 /= double(beta);
567 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
571 std::cout <<
"Testing shrinking with GPU scalar..." << std::endl;
572 for (std::size_t i=0; i < host_v1.size(); ++i)
576 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
584 for (
size_t i=0; i < host_v1.size(); ++i)
586 host_v1[i] = NumericT(1.0) + random<NumericT>();
587 host_v2[i] = NumericT(3.1415) * host_v1[i];
592 std::cout <<
"Testing add on vector..." << std::endl;
594 std::cout <<
"Checking for successful copy..." << std::endl;
595 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
597 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
600 for (
size_t i=0; i < host_v1.size(); ++i)
601 host_v1[i] = host_v1[i] + host_v2[i];
602 vcl_v1 = vcl_v1 + vcl_v2;
604 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
607 std::cout <<
"Testing add on vector with flipsign..." << std::endl;
608 for (
size_t i=0; i < host_v1.size(); ++i)
609 host_v1[i] = - host_v1[i] + host_v2[i];
610 vcl_v1 = - vcl_v1 + vcl_v2;
612 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
615 std::cout <<
"Testing inplace-add on vector..." << std::endl;
616 for (
size_t i=0; i < host_v1.size(); ++i)
617 host_v1[i] += host_v2[i];
620 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
623 std::cout <<
"Testing assignment to vector with vector multiplied by scalar expression..." << std::endl;
625 for (std::size_t i=0; i < host_v1.size(); ++i)
626 cpu_result += host_v1[i] * host_v2[i];
627 for (std::size_t i=0; i < host_v1.size(); ++i)
628 host_v1[i] = cpu_result * host_v2[i];
635 std::cout <<
"Testing sub on vector..." << std::endl;
636 for (std::size_t i=0; i < host_v1.size(); ++i)
637 host_v2[i] = NumericT(3.1415) * host_v1[i];
641 for (std::size_t i=0; i < host_v1.size(); ++i)
642 host_v1[i] = host_v1[i] - host_v2[i];
643 vcl_v1 = vcl_v1 - vcl_v2;
645 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
648 std::cout <<
"Testing inplace-sub on vector..." << std::endl;
649 for (std::size_t i=0; i < host_v1.size(); ++i)
650 host_v1[i] -= host_v2[i];
653 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
661 std::cout <<
"Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
662 for (
size_t i=0; i < host_v1.size(); ++i)
664 host_v1[i] = NumericT(1.0) + random<NumericT>();
665 host_v2[i] = NumericT(3.1415) * host_v1[i];
670 for (std::size_t i=0; i < host_v1.size(); ++i)
671 host_v1[i] = host_v1[i] + host_v2[i] *
float(alpha);
672 vcl_v1 = vcl_v1 + vcl_v2 * float(alpha);
674 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
677 for (std::size_t i=0; i < host_v1.size(); ++i)
678 host_v1[i] = host_v1[i] + host_v2[i] *
double(alpha);
679 vcl_v1 = vcl_v1 + vcl_v2 * double(alpha);
681 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
685 std::cout <<
"Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
686 for (std::size_t i=0; i < host_v1.size(); ++i)
687 host_v2[i] = NumericT(3.1415) * host_v1[i];
691 for (std::size_t i=0; i < host_v1.size(); ++i)
692 host_v1[i] =
long(alpha) * host_v1[i] + host_v2[i];
693 vcl_v1 = long(alpha) * vcl_v1 + vcl_v2;
695 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
698 for (std::size_t i=0; i < host_v1.size(); ++i)
699 host_v1[i] =
float(alpha) * host_v1[i] + host_v2[i];
700 vcl_v1 = float(alpha) * vcl_v1 + vcl_v2;
702 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
705 for (std::size_t i=0; i < host_v1.size(); ++i)
706 host_v1[i] =
double(alpha) * host_v1[i] + host_v2[i];
707 vcl_v1 = double(alpha) * vcl_v1 + vcl_v2;
709 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
713 std::cout <<
"Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
714 for (std::size_t i=0; i < host_v1.size(); ++i)
715 host_v2[i] = NumericT(3.1415) * host_v1[i];
719 for (std::size_t i=0; i < host_v1.size(); ++i)
720 host_v1[i] =
long(alpha) * host_v1[i] + long(beta) * host_v2[i];
721 vcl_v1 = long(alpha) * vcl_v1 + long(beta) * vcl_v2;
723 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
726 for (std::size_t i=0; i < host_v1.size(); ++i)
727 host_v1[i] =
float(alpha) * host_v1[i] + float(beta) * host_v2[i];
728 vcl_v1 = float(alpha) * vcl_v1 + float(beta) * vcl_v2;
730 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
733 for (std::size_t i=0; i < host_v1.size(); ++i)
734 host_v1[i] =
double(alpha) * host_v1[i] + double(beta) * host_v2[i];
735 vcl_v1 = double(alpha) * vcl_v1 + double(beta) * vcl_v2;
737 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
741 std::cout <<
"Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
742 for (std::size_t i=0; i < host_v1.size(); ++i)
743 host_v2[i] = NumericT(3.1415) * host_v1[i];
747 for (std::size_t i=0; i < host_v1.size(); ++i)
748 host_v1[i] += host_v2[i] *
long(alpha);
749 vcl_v1 += vcl_v2 * long(alpha);
751 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
754 for (std::size_t i=0; i < host_v1.size(); ++i)
755 host_v1[i] += host_v2[i] *
float(alpha);
756 vcl_v1 += vcl_v2 * float(alpha);
758 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
761 for (std::size_t i=0; i < host_v1.size(); ++i)
762 host_v1[i] +=
double(alpha) * host_v2[i];
763 vcl_v1 += double(alpha) * vcl_v2;
765 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
769 std::cout <<
"Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
770 for (std::size_t i=0; i < host_v1.size(); ++i)
771 host_v2[i] = NumericT(3.1415) * host_v1[i];
775 for (std::size_t i=0; i < host_v1.size(); ++i)
776 host_v1[i] = host_v1[i] + alpha * host_v2[i];
777 vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
779 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
782 std::cout <<
"Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
783 for (std::size_t i=0; i < host_v1.size(); ++i)
784 host_v2[i] = NumericT(3.1415) * host_v1[i];
788 for (std::size_t i=0; i < host_v1.size(); ++i)
789 host_v1[i] = host_v1[i] + alpha * host_v2[i];
790 vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
792 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
795 std::cout <<
"Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
796 for (std::size_t i=0; i < host_v1.size(); ++i)
797 host_v2[i] = NumericT(3.1415) * host_v1[i];
801 for (std::size_t i=0; i < host_v1.size(); ++i)
802 host_v1[i] = alpha * host_v1[i] + beta * host_v2[i];
803 vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
805 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
809 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
810 for (std::size_t i=0; i < host_v1.size(); ++i)
811 host_v2[i] = NumericT(3.1415) * host_v1[i];
815 for (std::size_t i=0; i < host_v1.size(); ++i)
816 host_v1[i] += alpha * host_v1[i] + beta * host_v2[i];
817 vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
819 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
822 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
823 for (std::size_t i=0; i < host_v1.size(); ++i)
824 host_v2[i] = NumericT(3.1415) * host_v1[i];
828 for (std::size_t i=0; i < host_v1.size(); ++i)
829 host_v1[i] += alpha * host_v1[i] - beta * host_v2[i];
830 vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
832 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
837 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
838 for (std::size_t i=0; i < host_v1.size(); ++i)
839 host_v2[i] = NumericT(3.1415) * host_v1[i];
843 for (std::size_t i=0; i < host_v1.size(); ++i)
844 host_v1[i] += alpha * host_v2[i];
845 vcl_v1 += gpu_alpha * vcl_v2;
847 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
854 std::cout <<
"Testing division-add on vector with CPU scalar (right)..." << std::endl;
855 for (
size_t i=0; i < host_v1.size(); ++i)
857 host_v1[i] = NumericT(1.0) + random<NumericT>();
858 host_v2[i] = NumericT(3.1415) * host_v1[i];
863 for (std::size_t i=0; i < host_v1.size(); ++i)
864 host_v1[i] = host_v1[i] + host_v2[i] /
long(alpha);
865 vcl_v1 = vcl_v1 + vcl_v2 / long(alpha);
867 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
870 for (std::size_t i=0; i < host_v1.size(); ++i)
871 host_v1[i] = host_v1[i] + host_v2[i] /
float(alpha);
872 vcl_v1 = vcl_v1 + vcl_v2 / float(alpha);
874 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
877 for (std::size_t i=0; i < host_v1.size(); ++i)
878 host_v1[i] = host_v1[i] + host_v2[i] /
double(alpha);
879 vcl_v1 = vcl_v1 + vcl_v2 / double(alpha);
881 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
885 std::cout <<
"Testing division-add on vector with CPU scalar (left)..." << std::endl;
886 for (std::size_t i=0; i < host_v1.size(); ++i)
887 host_v2[i] = NumericT(3.1415) * host_v1[i];
891 for (std::size_t i=0; i < host_v1.size(); ++i)
892 host_v1[i] = host_v1[i] /
float(alpha) + host_v2[i];
893 vcl_v1 = vcl_v1 / float(alpha) + vcl_v2;
895 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
898 for (std::size_t i=0; i < host_v1.size(); ++i)
899 host_v1[i] = host_v1[i] /
double(alpha) + host_v2[i];
900 vcl_v1 = vcl_v1 / double(alpha) + vcl_v2;
902 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
906 std::cout <<
"Testing division-add on vector with CPU scalar (both)..." << std::endl;
907 for (std::size_t i=0; i < host_v1.size(); ++i)
908 host_v2[i] = NumericT(3.1415) * host_v1[i];
912 for (std::size_t i=0; i < host_v1.size(); ++i)
913 host_v1[i] = host_v1[i] /
float(alpha) + host_v2[i] / float(beta);
914 vcl_v1 = vcl_v1 / float(alpha) + vcl_v2 / float(beta);
916 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
919 for (std::size_t i=0; i < host_v1.size(); ++i)
920 host_v1[i] = host_v1[i] /
double(alpha) + host_v2[i] / double(beta);
921 vcl_v1 = vcl_v1 / double(alpha) + vcl_v2 / double(beta);
923 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
926 std::cout <<
"Testing division-multiply-add on vector with CPU scalar..." << std::endl;
927 for (std::size_t i=0; i < host_v1.size(); ++i)
928 host_v2[i] = NumericT(3.1415) * host_v1[i];
932 for (std::size_t i=0; i < host_v1.size(); ++i)
933 host_v1[i] = host_v1[i] / alpha + host_v2[i] * beta;
934 vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
936 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
940 std::cout <<
"Testing multiply-division-add on vector with CPU scalar..." << std::endl;
941 for (std::size_t i=0; i < host_v1.size(); ++i)
942 host_v2[i] = NumericT(3.1415) * host_v1[i];
946 for (std::size_t i=0; i < host_v1.size(); ++i)
947 host_v1[i] = host_v1[i] * alpha + host_v2[i] / beta;
948 vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
950 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
955 std::cout <<
"Testing inplace division-add on vector with CPU scalar..." << std::endl;
956 for (std::size_t i=0; i < host_v1.size(); ++i)
957 host_v2[i] = NumericT(3.1415) * host_v1[i];
961 for (std::size_t i=0; i < host_v1.size(); ++i)
962 host_v1[i] += host_v2[i] / alpha;
963 vcl_v1 += vcl_v2 / alpha;
965 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
969 std::cout <<
"Testing division-add on vector with GPU scalar (right)..." << std::endl;
970 for (std::size_t i=0; i < host_v1.size(); ++i)
971 host_v2[i] = NumericT(3.1415) * host_v1[i];
975 for (std::size_t i=0; i < host_v1.size(); ++i)
976 host_v1[i] = host_v1[i] + host_v2[i] / alpha;
977 vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
979 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
982 std::cout <<
"Testing division-add on vector with GPU scalar (left)..." << std::endl;
983 for (std::size_t i=0; i < host_v1.size(); ++i)
984 host_v2[i] = NumericT(3.1415) * host_v1[i];
988 for (std::size_t i=0; i < host_v1.size(); ++i)
989 host_v1[i] = host_v1[i] + host_v2[i] / alpha;
990 vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
992 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
995 std::cout <<
"Testing division-add on vector with GPU scalar (both)..." << std::endl;
996 for (std::size_t i=0; i < host_v1.size(); ++i)
997 host_v2[i] = NumericT(3.1415) * host_v1[i];
1001 for (std::size_t i=0; i < host_v1.size(); ++i)
1002 host_v1[i] = host_v1[i] / alpha + host_v2[i] / beta;
1003 vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1005 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1006 return EXIT_FAILURE;
1009 std::cout <<
"Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
1010 for (std::size_t i=0; i < host_v1.size(); ++i)
1011 host_v2[i] = NumericT(3.1415) * host_v1[i];
1015 for (std::size_t i=0; i < host_v1.size(); ++i)
1016 host_v1[i] += host_v1[i] / alpha + host_v2[i] / beta;
1017 vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1019 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1020 return EXIT_FAILURE;
1022 std::cout <<
"Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
1023 for (std::size_t i=0; i < host_v1.size(); ++i)
1024 host_v2[i] = NumericT(3.1415) * host_v1[i];
1028 for (std::size_t i=0; i < host_v1.size(); ++i)
1029 host_v1[i] += host_v1[i] / alpha - host_v2[i] / beta;
1030 vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1032 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1033 return EXIT_FAILURE;
1035 std::cout <<
"Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
1036 for (std::size_t i=0; i < host_v1.size(); ++i)
1037 host_v2[i] = NumericT(3.1415) * host_v1[i];
1041 for (std::size_t i=0; i < host_v1.size(); ++i)
1042 host_v1[i] += host_v1[i] / alpha + host_v2[i] * beta;
1043 vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1045 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1046 return EXIT_FAILURE;
1048 std::cout <<
"Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
1049 for (std::size_t i=0; i < host_v1.size(); ++i)
1050 host_v2[i] = NumericT(3.1415) * host_v1[i];
1054 for (std::size_t i=0; i < host_v1.size(); ++i)
1055 host_v1[i] += host_v1[i] * alpha - host_v2[i] / beta;
1056 vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1058 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1059 return EXIT_FAILURE;
1063 std::cout <<
"Testing inplace division-add on vector with GPU scalar..." << std::endl;
1064 for (std::size_t i=0; i < host_v1.size(); ++i)
1065 host_v2[i] = NumericT(3.1415) * host_v1[i];
1069 for (std::size_t i=0; i < host_v1.size(); ++i)
1070 host_v1[i] += host_v2[i] * alpha;
1071 vcl_v1 += vcl_v2 * gpu_alpha;
1073 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1074 return EXIT_FAILURE;
1081 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
1082 for (
size_t i=0; i < host_v1.size(); ++i)
1084 host_v1[i] = NumericT(1.0) + random<NumericT>();
1085 host_v2[i] = NumericT(3.1415) * host_v1[i];
1090 for (std::size_t i=0; i < host_v1.size(); ++i)
1091 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1092 vcl_v1 = vcl_v1 - alpha * vcl_v2;
1094 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1095 return EXIT_FAILURE;
1098 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
1099 for (std::size_t i=0; i < host_v1.size(); ++i)
1100 host_v2[i] = NumericT(3.1415) * host_v1[i];
1104 for (std::size_t i=0; i < host_v1.size(); ++i)
1105 host_v1[i] = alpha * host_v1[i] - host_v2[i];
1106 vcl_v1 = alpha * vcl_v1 - vcl_v2;
1108 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1109 return EXIT_FAILURE;
1111 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
1112 for (std::size_t i=0; i < host_v1.size(); ++i)
1113 host_v2[i] = NumericT(3.1415) * host_v1[i];
1117 for (std::size_t i=0; i < host_v1.size(); ++i)
1118 host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1119 vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
1121 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1122 return EXIT_FAILURE;
1125 std::cout <<
"Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
1126 for (std::size_t i=0; i < host_v1.size(); ++i)
1127 host_v2[i] = NumericT(3.1415) * host_v1[i];
1131 for (std::size_t i=0; i < host_v1.size(); ++i)
1132 host_v1[i] -= alpha * host_v2[i];
1133 vcl_v1 -= alpha * vcl_v2;
1135 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1136 return EXIT_FAILURE;
1139 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
1140 for (std::size_t i=0; i < host_v1.size(); ++i)
1141 host_v2[i] = NumericT(3.1415) * host_v1[i];
1145 for (std::size_t i=0; i < host_v1.size(); ++i)
1146 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1147 vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1149 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1150 return EXIT_FAILURE;
1152 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
1153 for (std::size_t i=0; i < host_v1.size(); ++i)
1154 host_v2[i] = NumericT(3.1415) * host_v1[i];
1158 for (std::size_t i=0; i < host_v1.size(); ++i)
1159 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1160 vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1162 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1163 return EXIT_FAILURE;
1165 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
1166 for (std::size_t i=0; i < host_v1.size(); ++i)
1167 host_v2[i] = NumericT(3.1415) * host_v1[i];
1171 for (std::size_t i=0; i < host_v1.size(); ++i)
1172 host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1173 vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1175 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1176 return EXIT_FAILURE;
1178 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1179 for (std::size_t i=0; i < host_v1.size(); ++i)
1180 host_v2[i] = NumericT(3.1415) * host_v1[i];
1184 for (std::size_t i=0; i < host_v1.size(); ++i)
1185 host_v1[i] -= alpha * host_v1[i] + beta * host_v2[i];
1186 vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
1188 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1189 return EXIT_FAILURE;
1191 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1192 for (std::size_t i=0; i < host_v1.size(); ++i)
1193 host_v2[i] = NumericT(3.1415) * host_v1[i];
1197 for (std::size_t i=0; i < host_v1.size(); ++i)
1198 host_v1[i] -= alpha * host_v1[i] - beta * host_v2[i];
1199 vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1201 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1202 return EXIT_FAILURE;
1205 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
1206 for (std::size_t i=0; i < host_v1.size(); ++i)
1207 host_v2[i] = NumericT(3.1415) * host_v1[i];
1211 for (std::size_t i=0; i < host_v1.size(); ++i)
1212 host_v1[i] -= alpha * host_v2[i];
1213 vcl_v1 -= gpu_alpha * vcl_v2;
1215 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1216 return EXIT_FAILURE;
1223 std::cout <<
"Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
1224 for (
size_t i=0; i < host_v1.size(); ++i)
1226 host_v1[i] = NumericT(1.0) + random<NumericT>();
1227 host_v2[i] = NumericT(3.1415) * host_v1[i];
1232 for (std::size_t i=0; i < host_v1.size(); ++i)
1233 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1234 vcl_v1 = vcl_v1 - vcl_v2 / alpha;
1236 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1237 return EXIT_FAILURE;
1240 std::cout <<
"Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
1241 for (std::size_t i=0; i < host_v1.size(); ++i)
1242 host_v2[i] = NumericT(3.1415) * host_v1[i];
1246 for (std::size_t i=0; i < host_v1.size(); ++i)
1247 host_v1[i] = host_v1[i] / alpha - host_v2[i];
1248 vcl_v1 = vcl_v1 / alpha - vcl_v2;
1250 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1251 return EXIT_FAILURE;
1253 std::cout <<
"Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
1254 for (std::size_t i=0; i < host_v1.size(); ++i)
1255 host_v2[i] = NumericT(3.1415) * host_v1[i];
1259 for (std::size_t i=0; i < host_v1.size(); ++i)
1260 host_v1[i] = host_v1[i] / alpha - host_v2[i] / alpha;
1261 vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha;
1263 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1264 return EXIT_FAILURE;
1267 std::cout <<
"Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
1268 for (std::size_t i=0; i < host_v1.size(); ++i)
1269 host_v2[i] = NumericT(3.1415) * host_v1[i];
1273 for (std::size_t i=0; i < host_v1.size(); ++i)
1274 host_v1[i] -= host_v2[i] / alpha;
1275 vcl_v1 -= vcl_v2 / alpha;
1277 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1278 return EXIT_FAILURE;
1280 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1281 for (std::size_t i=0; i < host_v1.size(); ++i)
1282 host_v2[i] = NumericT(3.1415) * host_v1[i];
1286 for (std::size_t i=0; i < host_v1.size(); ++i)
1287 host_v1[i] -= host_v2[i] / alpha;
1288 vcl_v1 -= vcl_v2 / gpu_alpha;
1290 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1291 return EXIT_FAILURE;
1294 std::cout <<
"Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
1295 for (std::size_t i=0; i < host_v1.size(); ++i)
1296 host_v2[i] = NumericT(3.1415) * host_v1[i];
1300 for (std::size_t i=0; i < host_v1.size(); ++i)
1301 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1302 vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1304 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1305 return EXIT_FAILURE;
1307 std::cout <<
"Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
1308 for (std::size_t i=0; i < host_v1.size(); ++i)
1309 host_v2[i] = NumericT(3.1415) * host_v1[i];
1313 for (std::size_t i=0; i < host_v1.size(); ++i)
1314 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1315 vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1317 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1318 return EXIT_FAILURE;
1320 std::cout <<
"Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
1321 for (std::size_t i=0; i < host_v1.size(); ++i)
1322 host_v2[i] = NumericT(3.1415) * host_v1[i];
1326 for (std::size_t i=0; i < host_v1.size(); ++i)
1327 host_v1[i] = host_v1[i] / alpha - host_v2[i] / beta;
1328 vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1330 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1331 return EXIT_FAILURE;
1333 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1334 for (std::size_t i=0; i < host_v1.size(); ++i)
1335 host_v2[i] = NumericT(3.1415) * host_v1[i];
1339 for (std::size_t i=0; i < host_v1.size(); ++i)
1340 host_v1[i] -= host_v1[i] / alpha + host_v2[i] / beta;
1341 vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1343 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1344 return EXIT_FAILURE;
1346 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1347 for (std::size_t i=0; i < host_v1.size(); ++i)
1348 host_v2[i] = NumericT(3.1415) * host_v1[i];
1352 for (std::size_t i=0; i < host_v1.size(); ++i)
1353 host_v1[i] -= host_v1[i] / alpha - host_v2[i] / beta;
1354 vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1356 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1357 return EXIT_FAILURE;
1359 std::cout <<
"Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
1360 for (std::size_t i=0; i < host_v1.size(); ++i)
1361 host_v2[i] = NumericT(3.1415) * host_v1[i];
1365 for (std::size_t i=0; i < host_v1.size(); ++i)
1366 host_v1[i] = host_v1[i] * alpha - host_v2[i] / beta;
1367 vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1369 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1370 return EXIT_FAILURE;
1372 std::cout <<
"Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
1373 for (std::size_t i=0; i < host_v1.size(); ++i)
1374 host_v2[i] = NumericT(3.1415) * host_v1[i];
1378 for (std::size_t i=0; i < host_v1.size(); ++i)
1379 host_v1[i] = host_v1[i] / alpha - host_v2[i] * beta;
1380 vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1382 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1383 return EXIT_FAILURE;
1385 std::cout <<
"Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
1386 for (std::size_t i=0; i < host_v1.size(); ++i)
1387 host_v2[i] = NumericT(3.1415) * host_v1[i];
1391 for (std::size_t i=0; i < host_v1.size(); ++i)
1392 host_v1[i] -= host_v1[i] * alpha + host_v2[i] / beta;
1393 vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
1395 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1396 return EXIT_FAILURE;
1398 std::cout <<
"Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
1399 for (std::size_t i=0; i < host_v1.size(); ++i)
1400 host_v2[i] = NumericT(3.1415) * host_v1[i];
1404 for (std::size_t i=0; i < host_v1.size(); ++i)
1405 host_v1[i] -= host_v1[i] / alpha + host_v2[i] * beta;
1406 vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1408 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1409 return EXIT_FAILURE;
1411 std::cout <<
"Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1412 for (std::size_t i=0; i < host_v1.size(); ++i)
1413 host_v2[i] = NumericT(3.1415) * host_v1[i];
1417 for (std::size_t i=0; i < host_v1.size(); ++i)
1418 host_v1[i] -= host_v1[i] * alpha - host_v2[i] / beta;
1419 vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1421 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1422 return EXIT_FAILURE;
1424 std::cout <<
"Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1425 for (std::size_t i=0; i < host_v1.size(); ++i)
1426 host_v2[i] = NumericT(3.1415) * host_v1[i];
1430 for (std::size_t i=0; i < host_v1.size(); ++i)
1431 host_v1[i] -= host_v1[i] / alpha - host_v2[i] * beta;
1432 vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1434 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1435 return EXIT_FAILURE;
1438 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1439 for (std::size_t i=0; i < host_v1.size(); ++i)
1440 host_v2[i] = NumericT(3.1415) * host_v1[i];
1444 for (std::size_t i=0; i < host_v1.size(); ++i)
1445 host_v1[i] -= alpha * host_v2[i];
1446 vcl_v1 -= gpu_alpha * vcl_v2;
1448 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1449 return EXIT_FAILURE;
1456 for (std::size_t i=0; i < host_v1.size(); ++i)
1458 host_v1[i] = NumericT(1.0) + random<NumericT>();
1459 host_v2[i] = NumericT(3.1415) * host_v1[i];
1464 std::cout <<
"Testing three vector additions..." << std::endl;
1465 for (std::size_t i=0; i < host_v1.size(); ++i)
1466 host_v1[i] = host_v2[i] + host_v1[i] + host_v2[i];
1467 vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
1469 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1470 return EXIT_FAILURE;
1473 for (std::size_t i=0; i < host_v1.size(); ++i)
1474 host_v2[i] = NumericT(3.1415) * host_v1[i];
1478 std::cout <<
"Testing complicated vector expression with CPU scalar..." << std::endl;
1479 for (std::size_t i=0; i < host_v1.size(); ++i)
1480 host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1481 vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
1483 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1484 return EXIT_FAILURE;
1486 std::cout <<
"Testing complicated vector expression with GPU scalar..." << std::endl;
1487 for (std::size_t i=0; i < host_v1.size(); ++i)
1488 host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1489 vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
1491 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1492 return EXIT_FAILURE;
1495 for (std::size_t i=0; i < host_v1.size(); ++i)
1496 host_v2[i] = NumericT(3.1415) * host_v1[i];
1500 std::cout <<
"Testing swap..." << std::endl;
1501 for (std::size_t i=0; i < host_v1.size(); ++i)
1503 NumericT temp = host_v1[i];
1504 host_v1[i] = host_v2[i];
1507 swap(vcl_v1, vcl_v2);
1509 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1510 return EXIT_FAILURE;
1513 for (std::size_t i=0; i<host_v1.size(); ++i)
1515 host_v1[i] = NumericT(1.0) + random<NumericT>();
1516 host_v2[i] = NumericT(5.0) + random<NumericT>();
1522 std::cout <<
"Testing unary operator-..." << std::endl;
1523 for (std::size_t i=0; i < host_v1.size(); ++i)
1524 host_v1[i] = - host_v2[i];
1527 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1528 return EXIT_FAILURE;
1531 std::cout <<
"Testing elementwise multiplication..." << std::endl;
1532 std::cout <<
" v1 = element_prod(v1, v2);" << std::endl;
1533 for (std::size_t i=0; i < host_v1.size(); ++i)
1534 host_v1[i] = host_v1[i] * host_v2[i];
1537 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1538 return EXIT_FAILURE;
1540 std::cout <<
" v1 += element_prod(v1, v2);" << std::endl;
1541 for (std::size_t i=0; i < host_v1.size(); ++i)
1542 host_v1[i] += host_v1[i] * host_v2[i];
1545 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1546 return EXIT_FAILURE;
1548 std::cout <<
" v1 -= element_prod(v1, v2);" << std::endl;
1549 for (std::size_t i=0; i < host_v1.size(); ++i)
1550 host_v1[i] -= host_v1[i] * host_v2[i];
1553 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1554 return EXIT_FAILURE;
1557 std::cout <<
" v1 = element_prod(v1 + v2, v2);" << std::endl;
1558 for (std::size_t i=0; i < host_v1.size(); ++i)
1559 host_v1[i] = (host_v1[i] + host_v2[i]) * host_v2[i];
1562 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1563 return EXIT_FAILURE;
1565 std::cout <<
" v1 += element_prod(v1 + v2, v2);" << std::endl;
1566 for (std::size_t i=0; i < host_v1.size(); ++i)
1567 host_v1[i] += (host_v1[i] + host_v2[i]) * host_v2[i];
1570 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1571 return EXIT_FAILURE;
1573 std::cout <<
" v1 -= element_prod(v1 + v2, v2);" << std::endl;
1574 for (std::size_t i=0; i < host_v1.size(); ++i)
1575 host_v1[i] -= (host_v1[i] + host_v2[i]) * host_v2[i];
1578 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1579 return EXIT_FAILURE;
1582 std::cout <<
" v1 = element_prod(v1, v2 + v1);" << std::endl;
1583 for (std::size_t i=0; i < host_v1.size(); ++i)
1584 host_v1[i] = host_v1[i] * (host_v2[i] + host_v1[i]);
1587 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1588 return EXIT_FAILURE;
1590 std::cout <<
" v1 += element_prod(v1, v2 + v1);" << std::endl;
1591 for (std::size_t i=0; i < host_v1.size(); ++i)
1592 host_v1[i] += host_v1[i] * (host_v2[i] + host_v1[i]);
1595 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1596 return EXIT_FAILURE;
1598 std::cout <<
" v1 -= element_prod(v1, v2 + v1);" << std::endl;
1599 for (std::size_t i=0; i < host_v1.size(); ++i)
1600 host_v1[i] -= host_v1[i] * (host_v2[i] + host_v1[i]);
1603 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1604 return EXIT_FAILURE;
1607 std::cout <<
" v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
1608 for (std::size_t i=0; i < host_v1.size(); ++i)
1609 host_v1[i] = (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1612 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1613 return EXIT_FAILURE;
1615 std::cout <<
" v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
1616 for (std::size_t i=0; i < host_v1.size(); ++i)
1617 host_v1[i] += (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1620 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1621 return EXIT_FAILURE;
1623 std::cout <<
" v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
1624 for (std::size_t i=0; i < host_v1.size(); ++i)
1625 host_v1[i] -= (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1628 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1629 return EXIT_FAILURE;
1632 std::cout <<
"Testing elementwise division..." << std::endl;
1633 for (std::size_t i=0; i<host_v1.size(); ++i)
1635 host_v1[i] = NumericT(1.0) + random<NumericT>();
1636 host_v2[i] = NumericT(5.0) + random<NumericT>();
1642 for (std::size_t i=0; i < host_v1.size(); ++i)
1643 host_v1[i] = host_v1[i] / host_v2[i];
1646 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1647 return EXIT_FAILURE;
1649 for (std::size_t i=0; i < host_v1.size(); ++i)
1650 host_v1[i] += host_v1[i] / host_v2[i];
1653 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1654 return EXIT_FAILURE;
1656 for (std::size_t i=0; i < host_v1.size(); ++i)
1657 host_v1[i] -= host_v1[i] / host_v2[i];
1660 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1661 return EXIT_FAILURE;
1664 for (std::size_t i=0; i < host_v1.size(); ++i)
1665 host_v1[i] = (host_v1[i] + host_v2[i]) / host_v2[i];
1668 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1669 return EXIT_FAILURE;
1671 for (std::size_t i=0; i < host_v1.size(); ++i)
1672 host_v1[i] += (host_v1[i] + host_v2[i]) / host_v2[i];
1675 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1676 return EXIT_FAILURE;
1678 for (std::size_t i=0; i < host_v1.size(); ++i)
1679 host_v1[i] -= (host_v1[i] + host_v2[i]) / host_v2[i];
1682 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1683 return EXIT_FAILURE;
1686 for (std::size_t i=0; i < host_v1.size(); ++i)
1687 host_v1[i] = host_v1[i] / (host_v2[i] + host_v1[i]);
1690 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1691 return EXIT_FAILURE;
1693 for (std::size_t i=0; i < host_v1.size(); ++i)
1694 host_v1[i] += host_v1[i] / (host_v2[i] + host_v1[i]);
1697 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1698 return EXIT_FAILURE;
1700 for (std::size_t i=0; i < host_v1.size(); ++i)
1701 host_v1[i] -= host_v1[i] / (host_v2[i] + host_v1[i]);
1704 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1705 return EXIT_FAILURE;
1708 for (std::size_t i=0; i < host_v1.size(); ++i)
1709 host_v1[i] = (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1712 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1713 return EXIT_FAILURE;
1715 for (std::size_t i=0; i < host_v1.size(); ++i)
1716 host_v1[i] += (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1719 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1720 return EXIT_FAILURE;
1722 for (std::size_t i=0; i < host_v1.size(); ++i)
1723 host_v1[i] -= (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1726 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1727 return EXIT_FAILURE;
1730 std::cout <<
"Testing elementwise power function..." << std::endl;
1731 for (std::size_t i=0; i<host_v1.size(); ++i)
1733 host_v1[i] = NumericT(1.1) + NumericT(0.5) * random<NumericT>();
1734 host_v2[i] = NumericT(1.1) + NumericT(0.5) * random<NumericT>();
1736 std::vector<NumericT> std_v3(host_v1.size());
1742 for (std::size_t i=0; i<host_v3.size(); ++i)
1743 host_v3[i] = std::pow(host_v1[i], host_v2[i]);
1744 vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1746 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1748 std::cerr <<
"** Failure in v1 = pow(v1, v2);" << std::endl;
1749 return EXIT_FAILURE;
1753 for (std::size_t i=0; i<host_v3.size(); ++i)
1754 host_v3[i] = host_v1[i];
1755 for (std::size_t i=0; i<host_v3.size(); ++i)
1756 host_v3[i] += std::pow(host_v1[i], host_v2[i]);
1757 vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1759 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1761 std::cerr <<
"** Failure in v1 += pow(v1, v2);" << std::endl;
1762 return EXIT_FAILURE;
1766 for (std::size_t i=0; i<host_v3.size(); ++i)
1767 host_v3[i] = host_v1[i];
1768 for (std::size_t i=0; i<host_v3.size(); ++i)
1769 host_v3[i] -= std::pow(host_v1[i], host_v2[i]);
1770 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1772 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1774 std::cerr <<
"** Failure in v1 -= pow(v1, v2);" << std::endl;
1775 return EXIT_FAILURE;
1780 for (std::size_t i=0; i<host_v3.size(); ++i)
1781 host_v3[i] = host_v1[i];
1782 for (std::size_t i=0; i<host_v3.size(); ++i)
1783 host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1784 vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1786 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1788 std::cerr <<
"** Failure in v1 = pow(v1 + v2, v2);" << std::endl;
1789 return EXIT_FAILURE;
1793 for (std::size_t i=0; i<host_v3.size(); ++i)
1794 host_v3[i] = host_v1[i];
1795 for (std::size_t i=0; i<host_v3.size(); ++i)
1796 host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1797 vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1799 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1801 std::cerr <<
"** Failure in v1 += pow(v1 + v2, v2);" << std::endl;
1802 return EXIT_FAILURE;
1806 for (std::size_t i=0; i<host_v3.size(); ++i)
1807 host_v3[i] = host_v1[i];
1808 for (std::size_t i=0; i<host_v3.size(); ++i)
1809 host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1810 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1812 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1814 std::cerr <<
"** Failure in v1 -= pow(v1 + v2, v2);" << std::endl;
1815 return EXIT_FAILURE;
1820 for (std::size_t i=0; i<host_v3.size(); ++i)
1821 host_v3[i] = host_v1[i];
1822 for (std::size_t i=0; i<host_v3.size(); ++i)
1823 host_v3[i] = std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1824 vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1826 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1828 std::cerr <<
"** Failure in v1 = pow(v1, v2 + v1);" << std::endl;
1829 return EXIT_FAILURE;
1833 for (std::size_t i=0; i<host_v3.size(); ++i)
1834 host_v3[i] = host_v1[i];
1835 for (std::size_t i=0; i<host_v3.size(); ++i)
1836 host_v3[i] += std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1837 vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1839 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1841 std::cerr <<
"** Failure in v1 += pow(v1, v2 + v1);" << std::endl;
1842 return EXIT_FAILURE;
1846 for (std::size_t i=0; i<host_v3.size(); ++i)
1847 host_v3[i] = host_v1[i];
1848 for (std::size_t i=0; i<host_v3.size(); ++i)
1849 host_v3[i] -= std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1850 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1852 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1854 std::cerr <<
"** Failure in v1 -= pow(v1, v2 + v1);" << std::endl;
1855 return EXIT_FAILURE;
1860 for (std::size_t i=0; i<host_v3.size(); ++i)
1861 host_v3[i] = host_v1[i];
1862 for (std::size_t i=0; i<host_v3.size(); ++i)
1863 host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1864 vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1866 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1868 std::cerr <<
"** Failure in v1 = pow(v1 + v2, v2 + v1);" << std::endl;
1869 return EXIT_FAILURE;
1873 for (std::size_t i=0; i<host_v3.size(); ++i)
1874 host_v3[i] = host_v1[i];
1875 for (std::size_t i=0; i<host_v3.size(); ++i)
1876 host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1877 vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1879 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1881 std::cerr <<
"** Failure in v1 += pow(v1 + v2, v2 + v1);" << std::endl;
1882 return EXIT_FAILURE;
1886 for (std::size_t i=0; i<host_v3.size(); ++i)
1887 host_v3[i] = host_v1[i];
1888 for (std::size_t i=0; i<host_v3.size(); ++i)
1889 host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1890 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1892 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1894 std::cerr <<
"** Failure in v1 -= pow(v1 + v2, v2 + v1);" << std::endl;
1895 return EXIT_FAILURE;
1898 std::cout <<
"Testing unary elementwise operations..." << std::endl;
1899 for (
size_t i=0; i < host_v1.size(); ++i)
1900 host_v1[i] = random<NumericT>() / NumericT(4);
1902 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
1903 for (std::size_t i=0; i<host_v1.size(); ++i) \
1904 host_v2[i] = NumericT(3.1415) * host_v1[i]; \
1905 proxy_copy(host_v1, vcl_v1); \
1906 proxy_copy(host_v2, vcl_v2); \
1908 for (std::size_t i=0; i<host_v1.size(); ++i) \
1909 host_v1[i] = std::FUNCNAME(host_v2[i]); \
1910 vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1912 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1914 std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \
1915 return EXIT_FAILURE; \
1918 for (std::size_t i=0; i<host_v1.size(); ++i) \
1919 host_v1[i] = std::FUNCNAME(host_v1[i] + host_v2[i]); \
1920 vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1922 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1924 std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1925 return EXIT_FAILURE; \
1928 for (std::size_t i=0; i<host_v1.size(); ++i) \
1929 host_v1[i] += std::FUNCNAME(host_v1[i]); \
1930 vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \
1932 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1934 std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \
1935 return EXIT_FAILURE; \
1938 for (std::size_t i=0; i<host_v1.size(); ++i) \
1939 host_v1[i] += std::FUNCNAME(host_v1[i] + host_v2[i]); \
1940 vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1942 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1944 std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1945 return EXIT_FAILURE; \
1948 for (std::size_t i=0; i<host_v1.size(); ++i) \
1949 host_v1[i] -= std::FUNCNAME(host_v2[i]); \
1950 vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1952 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1954 std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \
1955 return EXIT_FAILURE; \
1958 for (std::size_t i=0; i<host_v1.size(); ++i) \
1959 host_v1[i] -= std::FUNCNAME(host_v1[i] + host_v2[i]); \
1960 vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1962 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1964 std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1965 return EXIT_FAILURE; \
1970 for (std::size_t i=0; i < host_v1.size(); ++i)
1971 host_v1[i] = random<NumericT>() / NumericT(4);
1986 for (std::size_t i=0; i<host_v1.size(); ++i)
1987 host_v2[i] = NumericT(3.1415) * host_v1[i];
1991 std::cout <<
"Testing another complicated vector expression with CPU scalars..." << std::endl;
1992 for (std::size_t i=0; i<host_v1.size(); ++i)
1993 host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
1994 vcl_v1 = vcl_v2 / alpha + beta * (vcl_v1 - alpha*vcl_v2);
1996 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1997 return EXIT_FAILURE;
1999 std::cout <<
"Testing another complicated vector expression with GPU scalars..." << std::endl;
2000 for (std::size_t i=0; i<host_v1.size(); ++i)
2001 host_v2[i] = NumericT(3.1415) * host_v1[i];
2005 for (std::size_t i=0; i<host_v1.size(); ++i)
2006 host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2007 vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1 - gpu_alpha*vcl_v2);
2009 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2010 return EXIT_FAILURE;
2013 std::cout <<
"Testing lenghty sum of scaled vectors..." << std::endl;
2014 for (std::size_t i=0; i<host_v1.size(); ++i)
2015 host_v2[i] = NumericT(3.1415) * host_v1[i];
2019 for (std::size_t i=0; i<host_v1.size(); ++i)
2020 host_v1[i] = host_v2[i] / alpha + beta * host_v1[i] - alpha * host_v2[i] + beta * host_v1[i] - alpha * host_v1[i];
2021 vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
2023 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2024 return EXIT_FAILURE;
2031 template<
typename NumericT,
typename Epsilon >
2034 int retval = EXIT_SUCCESS;
2035 std::size_t
size = 24656;
2037 std::cout <<
"Running tests for vector of size " << size << std::endl;
2042 std::vector<NumericT> std_full_vec(size);
2043 std::vector<NumericT> std_full_vec2(std_full_vec.size());
2045 for (std::size_t i=0; i<std_full_vec.size(); ++i)
2047 std_full_vec[i] = NumericT(1.0) + random<NumericT>();
2048 std_full_vec2[i] = NumericT(1.0) + random<NumericT>();
2051 std::size_t r1_start = std_full_vec.size() / 4;
2052 std::size_t r1_stop = 2 * std_full_vec.size() / 4;
2053 std::size_t r2_start = 2 * std_full_vec2.size() / 4;
2054 std::size_t r2_stop = 3 * std_full_vec2.size() / 4;
2058 std::size_t s1_start = std_full_vec.size() / 4;
2059 std::size_t s1_inc = 3;
2060 std::size_t s1_size = std_full_vec.size() / 4;
2061 std::size_t s2_start = 2 * std_full_vec2.size() / 4;
2062 std::size_t s2_inc = 2;
2063 std::size_t s2_size = std_full_vec2.size() / 4;
2074 viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
2076 viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
2077 viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
2085 std::vector<NumericT> std_short_vec(host_range_vec.
size());
2086 for (std::size_t i=0; i<std_short_vec.size(); ++i)
2087 std_short_vec[i] = host_range_vec[i];
2090 std::vector<NumericT> std_short_vec2(host_range_vec2.
size());
2091 for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2092 std_short_vec2[i] = host_range_vec2[i];
2095 std::cout <<
"Testing creation of vectors from range..." << std::endl;
2096 if (
check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2097 return EXIT_FAILURE;
2098 if (
check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2099 return EXIT_FAILURE;
2102 viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
2103 viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
2110 std::vector<NumericT> std_short_vec(host_slice_vec.
size());
2111 for (std::size_t i=0; i<std_short_vec.size(); ++i)
2112 std_short_vec[i] = host_slice_vec[i];
2115 std::vector<NumericT> std_short_vec2(host_slice_vec2.
size());
2116 for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2117 std_short_vec2[i] = host_slice_vec2[i];
2120 std::cout <<
"Testing creation of vectors from slice..." << std::endl;
2121 if (
check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2122 return EXIT_FAILURE;
2123 if (
check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2124 return EXIT_FAILURE;
2131 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
2132 retval = test<NumericT>(epsilon,
2133 host_short_vec, host_short_vec2,
2134 vcl_short_vec, vcl_short_vec2);
2135 if (retval != EXIT_SUCCESS)
2136 return EXIT_FAILURE;
2138 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
2139 retval = test<NumericT>(epsilon,
2140 host_short_vec, host_short_vec2,
2141 vcl_short_vec, vcl_range_vec2);
2142 if (retval != EXIT_SUCCESS)
2143 return EXIT_FAILURE;
2145 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
2146 retval = test<NumericT>(epsilon,
2147 host_short_vec, host_short_vec2,
2148 vcl_short_vec, vcl_slice_vec2);
2149 if (retval != EXIT_SUCCESS)
2150 return EXIT_FAILURE;
2154 std::cout <<
" ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
2155 retval = test<NumericT>(epsilon,
2156 host_short_vec, host_short_vec2,
2157 vcl_range_vec, vcl_short_vec2);
2158 if (retval != EXIT_SUCCESS)
2159 return EXIT_FAILURE;
2161 std::cout <<
" ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
2162 retval = test<NumericT>(epsilon,
2163 host_short_vec, host_short_vec2,
2164 vcl_range_vec, vcl_range_vec2);
2165 if (retval != EXIT_SUCCESS)
2166 return EXIT_FAILURE;
2168 std::cout <<
" ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
2169 retval = test<NumericT>(epsilon,
2170 host_short_vec, host_short_vec2,
2171 vcl_range_vec, vcl_slice_vec2);
2172 if (retval != EXIT_SUCCESS)
2173 return EXIT_FAILURE;
2177 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
2178 retval = test<NumericT>(epsilon,
2179 host_short_vec, host_short_vec2,
2180 vcl_slice_vec, vcl_short_vec2);
2181 if (retval != EXIT_SUCCESS)
2182 return EXIT_FAILURE;
2184 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
2185 retval = test<NumericT>(epsilon,
2186 host_short_vec, host_short_vec2,
2187 vcl_slice_vec, vcl_range_vec2);
2188 if (retval != EXIT_SUCCESS)
2189 return EXIT_FAILURE;
2191 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
2192 retval = test<NumericT>(epsilon,
2193 host_short_vec, host_short_vec2,
2194 vcl_slice_vec, vcl_slice_vec2);
2195 if (retval != EXIT_SUCCESS)
2196 return EXIT_FAILURE;
2198 return EXIT_SUCCESS;
2207 std::cout << std::endl;
2208 std::cout <<
"----------------------------------------------" << std::endl;
2209 std::cout <<
"----------------------------------------------" << std::endl;
2210 std::cout <<
"## Test :: Vector" << std::endl;
2211 std::cout <<
"----------------------------------------------" << std::endl;
2212 std::cout <<
"----------------------------------------------" << std::endl;
2213 std::cout << std::endl;
2215 int retval = EXIT_SUCCESS;
2217 std::cout << std::endl;
2218 std::cout <<
"----------------------------------------------" << std::endl;
2219 std::cout << std::endl;
2221 typedef float NumericT;
2222 NumericT epsilon =
static_cast<NumericT
>(1.0E-2);
2223 std::cout <<
"# Testing setup:" << std::endl;
2224 std::cout <<
" eps: " << epsilon << std::endl;
2225 std::cout <<
" numeric: float" << std::endl;
2226 retval = test<NumericT>(epsilon);
2227 if ( retval == EXIT_SUCCESS )
2228 std::cout <<
"# Test passed" << std::endl;
2232 std::cout << std::endl;
2233 std::cout <<
"----------------------------------------------" << std::endl;
2234 std::cout << std::endl;
2235 #ifdef VIENNACL_WITH_OPENCL
2240 typedef double NumericT;
2241 NumericT epsilon = 1.0E-10;
2242 std::cout <<
"# Testing setup:" << std::endl;
2243 std::cout <<
" eps: " << epsilon << std::endl;
2244 std::cout <<
" numeric: double" << std::endl;
2245 retval = test<NumericT>(epsilon);
2246 if ( retval == EXIT_SUCCESS )
2247 std::cout <<
"# Test passed" << std::endl;
2251 std::cout << std::endl;
2252 std::cout <<
"----------------------------------------------" << std::endl;
2253 std::cout << std::endl;
2256 std::cout << std::endl;
2257 std::cout <<
"------- Test completed --------" << std::endl;
2258 std::cout << std::endl;
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
T norm_2(std::vector< T, A > const &v1)
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
vector_proxy(NumericT *p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
NumericT & operator[](std::size_t index)
int test(Epsilon const &epsilon, HostVectorType &host_v1, HostVectorType &host_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Class for representing non-strided subvectors of a bigger vector x.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
int check(T1 const &t1, T2 const &t2, double epsilon)
void proxy_copy(vector_proxy< NumericT > const &host_vec, viennacl::vector_base< NumericT > &vcl_vec)
Class for representing strided subvectors of a bigger vector x.
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
NumericT const & operator[](std::size_t index) const
Proxy classes for vectors.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
Represents a vector consisting of 1 at a given index and zeros otherwise.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
NumericT max(std::vector< NumericT > const &v1)
T norm_inf(std::vector< T, A > const &v1)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
T norm_1(std::vector< T, A > const &v1)
size_type size() const
Returns the length of the vector (cf. std::vector)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)