|
constexpr int | size () const |
| Get the size of the Vec . Provided mostly to satisfy named requirements. More...
|
|
constexpr bool | empty () const |
|
constexpr | Vec ()=default |
| Vec of default-constructed elements of this type and length. More...
|
|
constexpr | Vec (const Vec< T, S > &)=default |
|
constexpr Vec< T, S > & | operator= (const Vec< T, S > &)=default |
|
template<typename E , typename... Es> |
constexpr | Vec (E head, Es... tail) |
| Construct a Vec containing the given variadic list of elements. More...
|
|
template<int InSize> |
constexpr | Vec (const T(&arr)[InSize]) |
| Construct a Vec from a constant array of elements. More...
|
|
constexpr iterator | begin () |
| Returns an iterator to the first element of the container. More...
|
|
constexpr const_iterator | begin () const |
| Returns a const iterator to the first element of the container. More...
|
|
constexpr iterator | end () |
| Returns an iterator to the last element of the container. More...
|
|
constexpr const_iterator | end () const |
| Returns a const iterator to the last element of the container. More...
|
|
__host__ std::vector< T > | toVector () |
| Convert a Vec to a std::vector . More...
|
|
constexpr auto | toTuple () |
| Convert the Vec to a std::tuple of elements of type T . More...
|
|
constexpr T & | operator[] (int i) |
| Access operator. More...
|
|
constexpr const T & | operator[] (int i) const |
| Access operator. More...
|
|
constexpr BareValueType | read (int i) const |
| Returns a copy of the value at index i. More...
|
|
template<int I> |
constexpr T & | get () |
| Read the element at index I. More...
|
|
template<int I> |
constexpr const T & | get () const |
| Read the element at index I. More...
|
|
template<typename Q , int OtherSize> |
constexpr bool | operator== (const Vec< Q, OtherSize > &other) const |
| Equality operator. More...
|
|
template<typename Q , int OtherSize> |
constexpr bool | operator!= (const Vec< Q, OtherSize > &other) const |
|
template<typename Op > |
constexpr auto | map (const Op &op=Op{}) const |
| Maps the Vec with the given callable, providing a Vec result of the same length. More...
|
|
constexpr Vec< T, Size > | operator- () const |
| Unary negation operator. More...
|
|
template<typename Q > |
constexpr Vec< T, Size > & | operator= (const Q &value) |
| Unary scalar assignment. More...
|
|
constexpr void | replace (T from, T to) |
| In-place replacement of all instances of from with to More...
|
|
template<typename Op > |
constexpr T | reduce (Op op=Op{}) const |
| Reduce the vector elements using a binary operator. More...
|
|
template<int NewSize> |
constexpr Vec< T, NewSize > | slice (int start) const |
| Get a copy of a contiguous slice of this Vec More...
|
|
template<int InSize, int ToWrite = InSize> |
constexpr void | splice (int I, const Vec< T, InSize > &in) |
| Overwrite a segment of this Vec with ToWrite values from another, starting at position I . More...
|
|
template<int I, int Len> |
constexpr sp::Vec< T, Size+Len > | insert (const sp::Vec< T, Len > &newValues) const |
| Insert multiple values. More...
|
|
template<int I> |
constexpr auto | insert (const T &newValue) const |
| Insert a single value. More...
|
|
constexpr sp::Vec< T, Size > | reverse () const |
| Get the reverse of this Vec More...
|
|
template<int I, int N = 1> |
constexpr auto | erase () const |
| Erase one or more values. More...
|
|
constexpr auto | eraseLast () const |
| Erase the last value. More...
|
|
constexpr auto | eraseFirst () const |
| Erase the first value. More...
|
|
template<bool Foo = false, typename std::enable_if_t< Foo||Size==1 > * = nullptr> |
| operator T () const |
| Allow 1-vectors to convert to the corresponding scalar. More...
|
|
template<typename T, int S>
class sp::Vec< T, S >
A vector of S
elements of type T
.
Like CUDA's float4
and friends, it facilitates the use of vectorised memory instructions by making an alignment promise. Vec
also provides a number of convenience features:
- Arithmetic operators allowing you to write math involving
Vec
s in the obvious way. This lets you very directly write vectorisable loops.
- Better metaprogramming, since the vector size and type are exposed as template parameters.
- Full
constexpr
support, allowing its use in highly elaborate metaprograms.
- Functional-programming-style operators like
map()
and reduce()
for transforming Vec
s.
- List-style manipulations like
insert()
and slice()
.
- Conversion to/from
std::vector
(CPU-only).
To facilitate interoperability, Vec
transparently converts to/from CUDA vector types.
A Vec
in memory is always aligned suitably for use with vector memory instructions.
Examples
The following example kernels:
- Load two 4-vectors in each thread
- Add them together
- Write the result back to memory.
- Assume the input is a multiple-of-four elements long, to make the examples shorter.
Each kernel has an example calling function showing how the data is initiated and moved around.
Example Using Vanilla CUDA
__global__ void sumKernelVanilla(float* X, float* Y, int N) {
int offset = blockIdx.x * blockDim.x + threadIdx.x;
if (offset >= N) {
return;
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wcast-align"
float4 xRead = ((float4*) X)[offset];
float4 yRead = ((float4*) Y)[offset];
xRead.w += yRead.w;
xRead.x += yRead.x;
xRead.y += yRead.y;
xRead.z += yRead.z;
*((float4*) X) = xRead;
#pragma clang diagnostic pop
}
float *a, *b = nullptr;
fprintf(stderr,
"GPU memory allocation failed");
}
fprintf(stderr,
"Copy from host to GPU failed");
}
sumKernelVanilla<<<1, 1, 0, stream>>>(a, b, 4);
return x;
}
Stream createStream(const std::string &name) const
static Device & getActive()
__host__ __device__ cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind)
__host__ __device__ cudaError_t cudaMalloc(void **devPtr, size_t size)
__host__ __device__ cudaError_t cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0))
Example Using Speclib
int offset = 4 * (blockIdx.x * blockDim.x + threadIdx.x);
if (offset >= X.
dim(0)) {
return;
}
X.vectorWrite(offset, outValue);
}
int dim(int d) const
Behaviour common to all TensorLikes ///.
Definition: TensorLike.hpp:233
Represents a Tensor- a multidimensional array that can represent a multilinear map.
Definition: Tensor.hpp:32
A vector of S elements of type T.
Definition: Vec.hpp:71
auto speclibTest() {
auto hostX = nomadicX.mutableHostTensor(stream);
auto hostY = nomadicY.mutableHostTensor(stream);
for (int i = 0; i < 4; i++) {
hostX[i] = i + 1;
hostY[i] = i + 1;
}
auto deviceX = nomadicX.mutableDeviceTensor(stream);
auto deviceY = nomadicY.mutableDeviceTensor(stream);
sumKernelSpeclib<<<1, 1, 0, stream>>>(deviceX, deviceY);
nomadicX.hostTensor(stream, true);
return nomadicX;
}
Represents a TensorLike that can be synchronised between the CPU and various GPUs.
Definition: NomadicTensor.hpp:36
On top of being shorter, this example:
- Bounds-checks (and alignment-checks) the memory accesses when compiled in debug mode, producing a descriptive error message if something goes wrong. In release mode, both programs compile to the same thing.
- Adapts seamlessly to any type by replacing
float
with a template parameter.