Program Listing for File fill_buffer_rand.h
↰ Return to documentation for file (include/embers/rand/fill_buffer_rand.h
)
/* Copyright © 2020 Advanced Micro Devices, Inc. All rights reserved */
#ifndef EMBERS_FILL_BUFFER_RAND_H
#define EMBERS_FILL_BUFFER_RAND_H
#include <cstdint>
#include <limits>
#include <hip/hip_runtime.h>
#include "embers/rand/xorshift.cuh"
namespace embers
{
namespace rand
{
// @brief Fill a buffer with random floating point numbers
// @param ptr Pointer to the buffer
// @param num_elems Number of elements in the buffer
// @param state Pointer to the random number generator state
// @param a Lower bound of the random numbers
// @param b Upper bound of the random numbers
template <typename T, size_t PREROUNDS = 13>
__host__ __device__ inline typename std::enable_if<std::is_floating_point<T>::value>::type
FillBufferRandom(T *ptr, size_t num_elems, xorshift128p_state *state,
T a = std::numeric_limits<T>::min(), T b = std::numeric_limits<T>::max())
{
#if defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__ == 1
size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
size_t stride = gridDim.x * blockDim.x;
#else
size_t tid = 0;
size_t stride = 1;
#endif
auto my_state = *state;
my_state.x[1] = (static_cast<uint64_t>(tid) << 32) | ~static_cast<uint32_t>(tid);
for (size_t prerounds = 0; prerounds < PREROUNDS; ++prerounds) xorshift128p(&my_state);
for (size_t i = tid; i < num_elems; i += stride) {
auto val = xorshift128p(&my_state);
ptr[i] = (static_cast<T>(val) / static_cast<T>(std::numeric_limits<decltype(val)>::max())) *
(b - a) +
a;
}
}
// @brief Fill a buffer with random integer numbers
// @param ptr Pointer to the buffer
// @param num_elems Number of elements in the buffer
// @param state Pointer to the random number generator state
// @param a Lower bound of the random numbers
// @param b Upper bound of the random numbers
template <typename T, size_t PREROUNDS = 13>
__host__ __device__ inline typename std::enable_if<std::is_integral<T>::value>::type
FillBufferRandom(T *ptr, size_t num_elems, xorshift128p_state *state,
T a = std::numeric_limits<T>::min(), T b = std::numeric_limits<T>::max())
{
#if defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__ == 1
size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
size_t stride = gridDim.x * blockDim.x;
#else
size_t tid = 0;
size_t stride = 1;
#endif
auto my_state = *state;
my_state.x[1] = (static_cast<uint64_t>(tid) << 32) | ~static_cast<uint32_t>(tid);
for (size_t prerounds = 0; prerounds < PREROUNDS; ++prerounds) xorshift128p(&my_state);
for (size_t i = tid; i < num_elems; i += stride) {
uint64_t val = xorshift128p(&my_state);
uint64_t tmp = static_cast<uint64_t>(b) - static_cast<uint64_t>(a);
// Usually: `ptr[i] = a + val % tmp`
// However; when a == b, we cannot use `tmp` since it will be zero.
ptr[i] = tmp ? static_cast<T>(static_cast<uint64_t>(a) + val % tmp) : a;
}
}
} // namespace rand
} // namespace embers
#endif // EMBERS_FILL_BUFFER_RAND_H