-
Notifications
You must be signed in to change notification settings - Fork 408
Expand file tree
/
Copy pathcommon.h
More file actions
118 lines (106 loc) · 5.92 KB
/
common.h
File metadata and controls
118 lines (106 loc) · 5.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include <memory>
#include <random>
#include <type_traits>
#define DEFINE_SCALAR_AND_VECTOR_FN2(Init, Loop) \
auto ScalarFn = [](auto *A, auto *B, unsigned TC) { \
Init _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
}; \
auto VectorFn = [](auto *A, auto *B, unsigned TC) { \
Init _Pragma("clang loop vectorize(enable)") Loop \
};
#define DEFINE_SCALAR_AND_VECTOR_FN3(Loop) \
auto ScalarFn = [](auto *A, auto *B, auto *C, unsigned TC) { \
_Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
}; \
auto VectorFn = [](auto *A, auto *B, auto *C, unsigned TC) { \
_Pragma("clang loop vectorize(enable)") Loop \
};
#define DEFINE_SCALAR_AND_VECTOR_FN4(Init, Loop) \
auto ScalarFn = [](auto *cond0, auto *cond1, auto *data0, auto *data1, \
unsigned N, int x) { \
Init _Pragma("clang loop vectorize(disable)") Loop \
}; \
auto VectorFn = [](auto *cond0, auto *cond1, auto *data0, auto *data1, \
unsigned N, int x) { \
Init _Pragma("clang loop vectorize(enable)") Loop \
};
#define DEFINE_NESTED_SCALAR_AND_VECTOR_FN4(InnerLoopCode) \
auto ScalarFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
for (unsigned long i = 0; i < OuterTC; i++) { \
_Pragma("clang loop vectorize(disable) interleave_count(1)") \
for (unsigned long j = 0; j < InnerTC; j++) { \
InnerLoopCode \
} \
} \
}; \
auto VectorFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
for (unsigned long i = 0; i < OuterTC; i++) { \
_Pragma("clang loop vectorize(enable)") \
for (unsigned long j = 0; j < InnerTC; j++) { \
InnerLoopCode \
} \
} \
};
#define DEFINE_NESTED_SCALAR_AND_VECTOR_FN5(InnerLoopCode) \
auto ScalarFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
for (long i = OuterTC - 1; i >= 0; i--) { \
_Pragma("clang loop vectorize(disable) interleave_count(1)") \
for (unsigned long j = 0; j < InnerTC; j++) { \
InnerLoopCode \
} \
} \
}; \
auto VectorFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
for (long i = OuterTC - 1; i >= 0; i--) { \
_Pragma("clang loop vectorize(enable)") \
for (unsigned long j = 0; j < InnerTC; j++) { \
InnerLoopCode \
} \
} \
};
static std::mt19937 rng;
// Initialize arrays A with random integers.
template <typename Int,
std::enable_if_t<std::is_integral<Int>::value, bool> = true>
static void init_data(const std::unique_ptr<Int[]> &A, unsigned N) {
std::uniform_int_distribution<uint64_t> distrib(
std::numeric_limits<Int>::min(), std::numeric_limits<Int>::max());
for (unsigned i = 0; i < N; i++)
A[i] = distrib(rng);
}
// Initialize arrays A with random floating points.
template <typename Float,
std::enable_if_t<std::is_floating_point<Float>::value, bool> = true>
static void init_data(const std::unique_ptr<Float[]> &A, unsigned N) {
std::uniform_real_distribution<float> distrib(
std::numeric_limits<Float>::min(), std::numeric_limits<Float>::max());
for (unsigned i = 0; i < N; i++)
A[i] = distrib(rng);
}
template <typename Ptr,
std::enable_if_t<std::is_pointer<Ptr>::value, bool> = true>
static void init_data(const std::unique_ptr<Ptr[]> &A, unsigned N) {
for (unsigned i = 0; i < N; i++)
A[i] = nullptr;
}
// Initialize arrays A with random booleans.
static void init_cond(const std::unique_ptr<bool[]> &A, unsigned N) {
std::uniform_int_distribution<uint64_t> distrib(0, 1);
for (unsigned i = 0; i < N; i++)
A[i] = !!distrib(rng);
}
template <typename Ty>
static void check(const std::unique_ptr<Ty[]> &Reference,
const std::unique_ptr<Ty[]> &Tmp, unsigned NumElements) {
if (!std::equal(&Reference[0], &Reference[0] + NumElements, &Tmp[0])) {
std::cerr << "Miscompare\n";
exit(1);
}
}
template <typename Ty>
static void check(const Ty Reference, const Ty ToCheck, unsigned NumElements) {
if (Reference != ToCheck) {
std::cerr << "Miscompare\n";
exit(1);
}
}