cpp : more robust code and added more documentation
This commit is contained in:
@ -180,7 +180,7 @@ static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Arr
|
||||
|
||||
np::Array<int32_t> apply_features_gpu(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
||||
const np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
||||
int32_t* d_X_feat;
|
||||
int32_t* d_X_feat = nullptr;
|
||||
|
||||
_print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t)));
|
||||
np::Array<uint32_t> d_X_ii = copyToDevice<uint32_t>("X_ii", X_ii);
|
||||
@ -268,7 +268,7 @@ np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__device__ inline static int32_t as_partition_gpu(const T* a, uint16_t* indices, const size_t l, const size_t h) noexcept {
|
||||
__device__ inline static int32_t as_partition_gpu(const T* a, uint16_t* const indices, const size_t l, const size_t h) noexcept {
|
||||
int32_t i = l - 1;
|
||||
for (int32_t j = l; j <= h; ++j)
|
||||
if (a[indices[j]] < a[indices[h]])
|
||||
@ -278,7 +278,7 @@ __device__ inline static int32_t as_partition_gpu(const T* a, uint16_t* indices,
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__device__ void argsort_gpu(const T* a, uint16_t* indices, const size_t l, const size_t h) noexcept {
|
||||
__device__ void argsort_gpu(const T* a, uint16_t* const indices, const size_t l, const size_t h) noexcept {
|
||||
const size_t total = h - l + 1;
|
||||
|
||||
//int32_t* stack = new int32_t[total]{l, h};
|
||||
@ -312,7 +312,7 @@ __device__ void argsort_gpu(const T* a, uint16_t* indices, const size_t l, const
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void argsort_bounded_gpu(const np::Array<T> a, uint16_t* indices){
|
||||
__global__ void argsort_bounded_gpu(const np::Array<T> a, uint16_t* const indices){
|
||||
const size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx >= a.shape[0])
|
||||
return;
|
||||
@ -324,7 +324,7 @@ __global__ void argsort_bounded_gpu(const np::Array<T> a, uint16_t* indices){
|
||||
np::Array<uint16_t> argsort_2d_gpu(const np::Array<int32_t>& X_feat) noexcept {
|
||||
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
|
||||
|
||||
uint16_t* d_indices;
|
||||
uint16_t* d_indices = nullptr;
|
||||
const size_t indices_size = np::prod(indices.shape) * sizeof(uint16_t);
|
||||
|
||||
np::Array<int32_t> d_X_feat = copyToDevice<int32_t>("X_feat", X_feat);
|
||||
|
Reference in New Issue
Block a user