cpp : Removed redundant keywords and replaced macros with constant expressions
This commit is contained in:
@@ -2,47 +2,47 @@
|
||||
#include "data.hpp"
|
||||
#include "ViolaJones_device.hpp"
|
||||
|
||||
constexpr static inline void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept {
|
||||
constexpr static void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept {
|
||||
memset(&feats[n], 0, 4 * sizeof(uint8_t));
|
||||
n += 4;
|
||||
}
|
||||
|
||||
constexpr static inline void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
constexpr static void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
feats[n++] = i + w;
|
||||
feats[n++] = j;
|
||||
feats[n++] = w;
|
||||
feats[n++] = h;
|
||||
}
|
||||
|
||||
constexpr static inline void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
constexpr static void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
feats[n++] = i;
|
||||
feats[n++] = j;
|
||||
feats[n++] = w;
|
||||
feats[n++] = h;
|
||||
}
|
||||
|
||||
constexpr static inline void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
constexpr static void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
feats[n++] = i;
|
||||
feats[n++] = j + h;
|
||||
feats[n++] = w;
|
||||
feats[n++] = h;
|
||||
}
|
||||
|
||||
constexpr static inline void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
constexpr static void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
feats[n++] = i + 2 * w;
|
||||
feats[n++] = j;
|
||||
feats[n++] = w;
|
||||
feats[n++] = h;
|
||||
}
|
||||
|
||||
constexpr static inline void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
constexpr static void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
feats[n++] = i;
|
||||
feats[n++] = j + 2 * h;
|
||||
feats[n++] = w;
|
||||
feats[n++] = h;
|
||||
}
|
||||
|
||||
constexpr static inline void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
constexpr static void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||
feats[n++] = i + w;
|
||||
feats[n++] = j + h;
|
||||
feats[n++] = w;
|
||||
@@ -228,7 +228,7 @@ np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, cons
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Select the best classifer given their predictions.
|
||||
* @brief Select the best classifier given their predictions.
|
||||
*
|
||||
* @param classifiers The weak classifiers
|
||||
* @param weights Trained weights of each classifiers
|
||||
@@ -271,7 +271,7 @@ std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np:
|
||||
weights /= np::sum(weights);
|
||||
const np::Array<float64_t> classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights);
|
||||
const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y);
|
||||
float64_t beta = error / (1.0 - error);
|
||||
const float64_t beta = error / (1.0 - error);
|
||||
weights *= np::pow(beta, (1.0 - accuracy));
|
||||
alphas[t] = std::log(1.0 / beta);
|
||||
final_classifier[t * 3] = clf;
|
||||
|
@@ -86,7 +86,7 @@ T benchmark_function(const char* const step_name, const int32_t& column_width, c
|
||||
#endif
|
||||
const std::chrono::system_clock::time_point start = perf_counter_ns();
|
||||
const T res = fnc(std::forward<Args>(args)...);
|
||||
const long long time_spent = duration_ns(perf_counter_ns() - start);
|
||||
const int64_t time_spent = duration_ns(perf_counter_ns() - start);
|
||||
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||
return res;
|
||||
}
|
||||
@@ -109,7 +109,7 @@ void benchmark_function_void(const char* const step_name, const int32_t& column_
|
||||
#endif
|
||||
const std::chrono::system_clock::time_point start = perf_counter_ns();
|
||||
fnc(std::forward<Args>(args)...);
|
||||
const long long time_spent = duration_ns(perf_counter_ns() - start);
|
||||
const int64_t time_spent = duration_ns(perf_counter_ns() - start);
|
||||
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||
}
|
||||
|
||||
|
@@ -114,7 +114,7 @@ static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept
|
||||
np::Array<uint32_t> d_inter = copyToDevice<uint32_t>("inter", inter);
|
||||
|
||||
const dim3 dimGrid(n_block_x, n_block_y, k);
|
||||
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
||||
constexpr dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
||||
__kernel_scan_3d__<<<dimGrid, dimBlock>>>(n, height, d_inter, d_X);
|
||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||
|
||||
@@ -182,7 +182,7 @@ static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexce
|
||||
const size_t n_block_x = static_cast<size_t>(std::ceil(static_cast<float64_t>(X.shape[1]) / static_cast<float64_t>(NB_THREADS_2D_X)));
|
||||
const size_t n_block_y = static_cast<size_t>(std::ceil(static_cast<float64_t>(X.shape[2]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
|
||||
const dim3 dimGrid(n_block_x, n_block_y, X.shape[0]);
|
||||
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
||||
constexpr dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
||||
__transpose_kernel__<<<dimGrid, dimBlock>>>(d_X, d_Xt);
|
||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||
_print_cuda_error_("memcpy d_Xt", cudaMemcpy(Xt.data, d_Xt.data, np::prod(Xt.shape) * sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
||||
@@ -274,7 +274,7 @@ np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::
|
||||
np::Array<uint8_t> d_y = copyToDevice<uint8_t>("y", y);
|
||||
|
||||
const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z)));
|
||||
constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z);
|
||||
constexpr dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z);
|
||||
__train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg);
|
||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||
|
||||
@@ -347,7 +347,7 @@ np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Arr
|
||||
const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X)));
|
||||
const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
|
||||
const dim3 dimGrid(dimX, dimY);
|
||||
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
||||
constexpr dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
||||
__apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii);
|
||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||
|
||||
|
@@ -6,22 +6,22 @@
|
||||
#define MODEL_DIR "./models"
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#define NB_THREADS 1024
|
||||
#define NB_THREADS_2D_X 32
|
||||
#define NB_THREADS_2D_Y 32
|
||||
#define NB_THREADS_3D_X 16
|
||||
#define NB_THREADS_3D_Y 16
|
||||
#define NB_THREADS_3D_Z 4
|
||||
constexpr size_t NB_THREADS = 1024;
|
||||
constexpr size_t NB_THREADS_2D_X = 32;
|
||||
constexpr size_t NB_THREADS_2D_Y = 32;
|
||||
constexpr size_t NB_THREADS_3D_X = 16;
|
||||
constexpr size_t NB_THREADS_3D_Y = 16;
|
||||
constexpr size_t NB_THREADS_3D_Z = 4;
|
||||
#define M static_cast<size_t>(log2f(NB_THREADS_2D_Y))
|
||||
#endif
|
||||
|
||||
// Save state to avoid recalculation on restart
|
||||
#define SAVE_STATE true
|
||||
constexpr bool SAVE_STATE = true;
|
||||
// Redo the state even if it's already saved
|
||||
#define FORCE_REDO false
|
||||
constexpr bool FORCE_REDO = false;
|
||||
|
||||
// Use GPU to greatly accelerate runtime
|
||||
#define GPU_BOOSTED true
|
||||
constexpr bool GPU_BOOSTED = true;
|
||||
// Depending on what you set, the output label will be as follow :
|
||||
// ┌─────────────┬───────┐
|
||||
// │ GPU_BOOSTED │ LABEL │
|
||||
@@ -31,16 +31,16 @@
|
||||
// └─────────────┴───────┘
|
||||
|
||||
// Number of weak classifiers
|
||||
// [[maybe_unused]] constexpr const std::array TS{ 1 };
|
||||
// [[maybe_unused]] constexpr const std::array TS{ 1, 5, 10 };
|
||||
[[maybe_unused]] constexpr const std::array TS{ 1, 5, 10, 25, 50 };
|
||||
// [[maybe_unused]] constexpr const std::array TS{ 1, 5, 10, 25, 50, 100, 200, 300 };
|
||||
// [[maybe_unused]] constexpr const std::array TS{ 1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000 };
|
||||
// constexpr std::array TS{ 1 };
|
||||
// constexpr std::array TS{ 1, 5, 10 };
|
||||
constexpr std::array TS{ 1, 5, 10, 25, 50 };
|
||||
// constexpr std::array TS{ 1, 5, 10, 25, 50, 100, 200, 300 };
|
||||
// constexpr std::array TS{ 1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000 };
|
||||
|
||||
// Enable verbose output (for debugging purposes)
|
||||
#define __DEBUG false
|
||||
constexpr bool __DEBUG = false;
|
||||
// Debugging options
|
||||
#if __DEBUG
|
||||
#define IDX_INSPECT 4548
|
||||
#define IDX_INSPECT_OFFSET 100
|
||||
constexpr size_t IDX_INSPECT = 4548;
|
||||
constexpr size_t IDX_INSPECT_OFFSET = 100;
|
||||
#endif
|
||||
|
77
cpp/data.hpp
77
cpp/data.hpp
@@ -4,11 +4,11 @@
|
||||
#include <cmath>
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
#include "config.hpp"
|
||||
|
||||
#define BUFFER_SIZE 256
|
||||
#define STRING_INT_SIZE 8 // Length of a number in log10 (including '-')
|
||||
constexpr size_t BUFFER_SIZE = 256;
|
||||
constexpr size_t STRING_INT_SIZE = 8; // Length of a number in log10 (including '-')
|
||||
|
||||
#ifndef __CUDACC__
|
||||
#define __host__
|
||||
@@ -20,7 +20,7 @@ typedef double float64_t;
|
||||
typedef long double float128_t;
|
||||
|
||||
namespace np {
|
||||
constexpr const float64_t inf = std::numeric_limits<float64_t>::infinity();
|
||||
constexpr float64_t inf = std::numeric_limits<float64_t>::infinity();
|
||||
|
||||
typedef struct Slice {
|
||||
size_t x = 0, y = 0, z = 0;
|
||||
@@ -35,14 +35,14 @@ namespace np {
|
||||
#endif
|
||||
|
||||
__host__ __device__
|
||||
Shape(void) noexcept {
|
||||
constexpr Shape(void) noexcept {
|
||||
#if __DEBUG
|
||||
printf("Shape created (default)\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Shape(const size_t& length, size_t* const data) noexcept : length(length), data(data), refcount(new size_t(1)) {
|
||||
inline Shape(const size_t& length, size_t* const data) noexcept : length(length), data(data), refcount(new size_t(1)) {
|
||||
#if __DEBUG
|
||||
printf("Shape created (raw)\n");
|
||||
for(size_t i = 0; i < length; ++i)
|
||||
@@ -51,7 +51,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
|
||||
inline Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
|
||||
#if __DEBUG
|
||||
printf("Shape created (initializer)\n");
|
||||
#endif
|
||||
@@ -65,7 +65,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Shape(const Shape& shape) noexcept {
|
||||
constexpr Shape(const Shape& shape) noexcept {
|
||||
#if __DEBUG
|
||||
printf("Shape created (copy)\n");
|
||||
#endif
|
||||
@@ -97,7 +97,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Shape(Shape&& shape) noexcept {
|
||||
constexpr Shape(Shape&& shape) noexcept {
|
||||
#if __DEBUG
|
||||
printf("Shape created (move)\n");
|
||||
#endif
|
||||
@@ -127,7 +127,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
~Shape(void) noexcept {
|
||||
inline ~Shape(void) noexcept {
|
||||
if(refcount == nullptr){
|
||||
#if __DEBUG
|
||||
printf("Shape refcount freed more than once\n");
|
||||
@@ -159,7 +159,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Shape& operator=(const Shape& shape) noexcept {
|
||||
constexpr Shape& operator=(const Shape& shape) noexcept {
|
||||
#if __DEBUG
|
||||
printf("Shape created (assign copy)\n");
|
||||
#endif
|
||||
@@ -190,7 +190,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Shape& operator=(Shape&& shape) noexcept {
|
||||
constexpr Shape& operator=(Shape&& shape) noexcept {
|
||||
#if __DEBUG
|
||||
printf("Shape created (assign move)\n");
|
||||
#endif
|
||||
@@ -221,7 +221,6 @@ namespace np {
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__
|
||||
constexpr size_t& operator[](const size_t& i) const {
|
||||
#if __DEBUG
|
||||
@@ -263,42 +262,42 @@ namespace np {
|
||||
size_t* refcount = nullptr;
|
||||
|
||||
__host__ __device__
|
||||
Array(void) noexcept {
|
||||
constexpr Array(void) noexcept {
|
||||
#if __DEBUG
|
||||
printf("Array created (default)\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
||||
constexpr Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
||||
#if __DEBUG
|
||||
printf("Array created (raw, copy shape)\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||
constexpr Array(const Shape& _shape) noexcept : shape(_shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||
#if __DEBUG
|
||||
printf("Array created (raw empty, copy shape)\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
||||
constexpr Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
||||
#if __DEBUG
|
||||
printf("Array created (raw, move shape)\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||
constexpr Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||
#if __DEBUG
|
||||
printf("Array created (raw empty, move shape)\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(const Array& array) noexcept : shape(array.shape) {
|
||||
constexpr Array(const Array& array) noexcept : shape(array.shape) {
|
||||
#if __DEBUG
|
||||
printf("Array created (copy)\n");
|
||||
#endif
|
||||
@@ -326,7 +325,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(Array&& array) noexcept : shape(std::move(array.shape)) {
|
||||
constexpr Array(Array&& array) noexcept : shape(std::move(array.shape)) {
|
||||
#if __DEBUG
|
||||
printf("Array created (move)\n");
|
||||
#endif
|
||||
@@ -350,7 +349,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
~Array(void) noexcept {
|
||||
inline ~Array(void) noexcept {
|
||||
if(refcount == nullptr){
|
||||
#if __DEBUG
|
||||
printf("Array refcount freed more than once\n");
|
||||
@@ -379,7 +378,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array& operator=(const Array& array) noexcept {
|
||||
constexpr Array& operator=(const Array& array) noexcept {
|
||||
#if __DEBUG
|
||||
printf("Array created (assign copy)\n");
|
||||
#endif
|
||||
@@ -409,7 +408,7 @@ namespace np {
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array& operator=(Array&& array) noexcept {
|
||||
constexpr Array& operator=(Array&& array) noexcept {
|
||||
#if __DEBUG
|
||||
printf("Array created (assign move)\n");
|
||||
#endif
|
||||
@@ -456,36 +455,36 @@ namespace np {
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
inline Array<T> empty(Shape&& shape) noexcept {
|
||||
constexpr Array<T> empty(Shape&& shape) noexcept {
|
||||
return Array<T>(shape);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline Array<T> empty(const Shape& shape) noexcept {
|
||||
constexpr Array<T> empty(const Shape& shape) noexcept {
|
||||
return Array<T>(shape);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline Array<T> empty(const std::initializer_list<size_t>& dims) noexcept {
|
||||
constexpr Array<T> empty(const std::initializer_list<size_t>& dims) noexcept {
|
||||
return Array<T>(dims);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Array<T> zeros(Shape&& shape) noexcept {
|
||||
constexpr Array<T> zeros(Shape&& shape) noexcept {
|
||||
Array<T> res(shape);
|
||||
memset(res.data, 0, sizeof(T) * np::prod(res.shape));
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Array<T> zeros(const Shape& shape) noexcept {
|
||||
constexpr Array<T> zeros(const Shape& shape) noexcept {
|
||||
Array<T> res(shape);
|
||||
memset(res.data, 0, sizeof(T) * np::prod(res.shape));
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept {
|
||||
constexpr Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept {
|
||||
Array<T> res(dims);
|
||||
memset(res.data, 0, sizeof(T) * np::prod(res.shape));
|
||||
return res;
|
||||
@@ -599,7 +598,7 @@ namespace np {
|
||||
|
||||
template<typename T>
|
||||
template<typename F>
|
||||
Array<T> Array<T>::operator-(const F& other) const {
|
||||
constexpr Array<T> Array<T>::operator-(const F& other) const noexcept {
|
||||
np::Array<T> res = np::empty<T>(shape);
|
||||
const size_t total = prod(shape);
|
||||
for(size_t i = 0; i < total; ++i)
|
||||
@@ -609,7 +608,7 @@ namespace np {
|
||||
|
||||
template<typename T>
|
||||
template<typename F>
|
||||
Array<T> Array<T>::operator-(const np::Array<F>& other) const {
|
||||
constexpr Array<T> Array<T>::operator-(const np::Array<F>& other) const {
|
||||
#if __DEBUG
|
||||
if (shape != other.shape){
|
||||
printf("Incompatible shapes\n");
|
||||
@@ -665,7 +664,7 @@ namespace np {
|
||||
}
|
||||
|
||||
template<typename T, typename F>
|
||||
np::Array<T> pow(const F& k, const Array<T>& array) noexcept {
|
||||
constexpr np::Array<T> pow(const F& k, const Array<T>& array) noexcept {
|
||||
np::Array<T> result = np::empty<T>(array.shape);
|
||||
const size_t total = prod(array.shape);
|
||||
for(size_t i = 0; i < total; ++i)
|
||||
@@ -692,7 +691,7 @@ namespace np {
|
||||
//}
|
||||
|
||||
template<typename T, typename F>
|
||||
Array<T> astype(const Array<F>& array) noexcept {
|
||||
constexpr Array<T> astype(const Array<F>& array) noexcept {
|
||||
Array<T> res = empty<T>(array.shape);
|
||||
const size_t total = prod(array.shape);
|
||||
for(size_t i = 0; i < total; ++i)
|
||||
@@ -701,7 +700,7 @@ namespace np {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Array<T> operator-(const T& k, const Array<T>& other) noexcept {
|
||||
constexpr Array<T> operator-(const T& k, const Array<T>& other) noexcept {
|
||||
np::Array<T> res = empty<T>(other.shape);
|
||||
const size_t total = prod(other.shape);
|
||||
for(size_t i = 0; i < total; ++i)
|
||||
@@ -748,7 +747,7 @@ constexpr np::Array<T>& map(np::Array<T>& a, const std::function<T(const size_t&
|
||||
|
||||
template<typename T>
|
||||
__host__ __device__
|
||||
constexpr inline static void swap(T* const a, T* const b) noexcept {
|
||||
constexpr void swap(T* const a, T* const b) noexcept {
|
||||
if (a == b) return;
|
||||
const T temp = *a;
|
||||
*a = *b;
|
||||
@@ -793,7 +792,7 @@ std::array<np::Array<uint8_t>, 4> load_datasets(void);
|
||||
void print_error_file(const char* const) noexcept;
|
||||
|
||||
template<typename T>
|
||||
void save(const np::Array<T>& d, const char* const filename) {
|
||||
constexpr void save(const np::Array<T>& d, const char* const filename) {
|
||||
FILE* const output = fopen(filename, "wb");
|
||||
if (output == NULL) {
|
||||
print_error_file(filename);
|
||||
@@ -808,7 +807,7 @@ void save(const np::Array<T>& d, const char* const filename) {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
np::Array<T> load(const char* const filename) {
|
||||
constexpr np::Array<T> load(const char* const filename) {
|
||||
FILE* const input = fopen(filename, "rb");
|
||||
if (input == NULL) {
|
||||
print_error_file(filename);
|
||||
@@ -838,7 +837,7 @@ np::Array<T> load(const char* const filename) {
|
||||
|
||||
#ifdef __CUDACC__
|
||||
template<typename T>
|
||||
np::Array<T> copyToDevice(const char* const name, const np::Array<T>& array) noexcept {
|
||||
constexpr np::Array<T> copyToDevice(const char* const name, const np::Array<T>& array) noexcept {
|
||||
const size_t array_size = np::prod(array.shape) * sizeof(T);
|
||||
const size_t shape_size = array.shape.length * sizeof(size_t);
|
||||
np::Array<T> d_array;
|
||||
@@ -869,7 +868,7 @@ constexpr void cudaFree(const char* const name, np::Array<T>& array) noexcept {
|
||||
array.shape.data = nullptr;
|
||||
}
|
||||
|
||||
constexpr inline void _print_cuda_error_(const char* const name, const cudaError_t& err) noexcept {
|
||||
constexpr void _print_cuda_error_(const char* const name, const cudaError_t& err) noexcept {
|
||||
if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err));
|
||||
}
|
||||
#endif
|
||||
|
@@ -31,7 +31,7 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
||||
for (const char* const folder_name : { "models", "out" })
|
||||
std::filesystem::create_directory(folder_name);
|
||||
|
||||
const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns();
|
||||
const std::chrono::time_point<std::chrono::high_resolution_clock> preproc_timestamp = perf_counter_ns();
|
||||
const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 };
|
||||
header(preproc_gaps, { "Preprocessing", "Time spent (ns)", "Formatted time spent" });
|
||||
|
||||
@@ -113,7 +113,7 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
||||
print(X_test_feat_argsort.shape);
|
||||
print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
#endif
|
||||
const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp);
|
||||
const int64_t time_spent = duration_ns(perf_counter_ns() - preproc_timestamp);
|
||||
formatted_line(preproc_gaps, "├", "┼", "─", "┤");
|
||||
formatted_row(preproc_gaps, { "Preprocessing summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||
footer(preproc_gaps);
|
||||
@@ -154,7 +154,7 @@ std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array
|
||||
#endif
|
||||
models[i++] = { alphas, final_classifiers };
|
||||
}
|
||||
const long long time_spent = duration_ns(perf_counter_ns() - training_timestamp);
|
||||
const int64_t time_spent = duration_ns(perf_counter_ns() - training_timestamp);
|
||||
formatted_line(training_gaps, "├", "┼", "─", "┤");
|
||||
formatted_row(training_gaps, { "Training summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||
footer(training_gaps);
|
||||
@@ -177,15 +177,15 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
|
||||
std::array<std::array<float64_t, 8>, TS.size()> results;
|
||||
|
||||
size_t i = 0;
|
||||
long long total_train_timestamp = 0;
|
||||
long long total_test_timestamp = 0;
|
||||
int64_t total_train_timestamp = 0;
|
||||
int64_t total_test_timestamp = 0;
|
||||
for (const auto& [ alphas, final_classifiers ] : models) {
|
||||
char title[BUFFER_SIZE] = { 0 };
|
||||
snprintf(title, BUFFER_SIZE, "ViolaJones T = %-4i (%s)", TS[i], LABEL);
|
||||
|
||||
std::chrono::system_clock::time_point start = perf_counter_ns();
|
||||
const np::Array<uint8_t> y_pred_train = classify_viola_jones(alphas, final_classifiers, X_train_feat);
|
||||
const long long t_pred_train = duration_ns(perf_counter_ns() - start);
|
||||
const int64_t t_pred_train = duration_ns(perf_counter_ns() - start);
|
||||
total_train_timestamp += t_pred_train;
|
||||
const float64_t e_acc = accuracy_score(y_train, y_pred_train);
|
||||
const float64_t e_f1 = f1_score(y_train, y_pred_train);
|
||||
@@ -194,7 +194,7 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
|
||||
|
||||
start = perf_counter_ns();
|
||||
const np::Array<uint8_t> y_pred_test = classify_viola_jones(alphas, final_classifiers, X_test_feat);
|
||||
const long long t_pred_test = duration_ns(perf_counter_ns() - start);
|
||||
const int64_t t_pred_test = duration_ns(perf_counter_ns() - start);
|
||||
total_test_timestamp += t_pred_test;
|
||||
const float64_t t_acc = accuracy_score(y_test, y_pred_test);
|
||||
const float64_t t_f1 = f1_score(y_test, y_pred_test);
|
||||
@@ -242,7 +242,7 @@ void unit_test(void) {
|
||||
++n_total;
|
||||
const std::chrono::system_clock::time_point start = perf_counter_ns();
|
||||
const bool state = fnc();
|
||||
const long long time_spent = duration_ns(perf_counter_ns() - start);
|
||||
const int64_t time_spent = duration_ns(perf_counter_ns() - start);
|
||||
if(state){
|
||||
formatted_row(unit_gaps, { title, "Passed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||
++n_success;
|
||||
@@ -330,7 +330,7 @@ void unit_test(void) {
|
||||
}
|
||||
}
|
||||
|
||||
const long long time_spent = duration_ns(perf_counter_ns() - unit_timestamp);
|
||||
const int64_t time_spent = duration_ns(perf_counter_ns() - unit_timestamp);
|
||||
|
||||
if (n_total == 0)
|
||||
formatted_row(unit_gaps, { "Unit testing summary", "No files", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||
|
@@ -1,10 +1,10 @@
|
||||
#include "toolbox.hpp"
|
||||
|
||||
inline static constexpr uint64_t u64(const double& n) noexcept { return static_cast<uint64_t>(n); }
|
||||
static constexpr uint64_t u64(const double& n) noexcept { return static_cast<uint64_t>(n); }
|
||||
|
||||
static const constexpr size_t N_TIMES = 11;
|
||||
static const constexpr std::array<const char*, N_TIMES> time_formats = { "ns", "us", "ms", "s", "m", "h", "j", "w", "M", "y", "c" };
|
||||
static const constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3), u64(1e6), u64(1e9), u64(6e10), u64(36e11), u64(864e11),
|
||||
static constexpr size_t N_TIMES = 11;
|
||||
static constexpr std::array<const char*, N_TIMES> time_formats = { "ns", "us", "ms", "s", "m", "h", "j", "w", "M", "y", "c" };
|
||||
static constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3), u64(1e6), u64(1e9), u64(6e10), u64(36e11), u64(864e11),
|
||||
u64(6048e11), u64(26784e11), u64(31536e12), u64(31536e14) };
|
||||
|
||||
/**
|
||||
@@ -60,9 +60,9 @@ std::string format_time_ns(uint64_t time) noexcept {
|
||||
return s;
|
||||
}
|
||||
|
||||
static const constexpr size_t N_BYTES = 7;
|
||||
static const constexpr std::array<const char*, N_BYTES> bytes_formats = { "", "K", "M", "G", "T", "P", "E", }; //"Z", "Y" };
|
||||
static const constexpr uint64_t total_bytes = u64(1)<<(10 * (N_BYTES - 1));
|
||||
static constexpr size_t N_BYTES = 7;
|
||||
static constexpr std::array<const char*, N_BYTES> bytes_formats = { "", "K", "M", "G", "T", "P", "E", }; //"Z", "Y" };
|
||||
static constexpr uint64_t total_bytes = u64(1)<<(10 * (N_BYTES - 1));
|
||||
|
||||
/**
|
||||
* @brief Convert the number of byte in JEDEC standard form.
|
||||
|
@@ -60,12 +60,15 @@ constexpr void header(const std::array<int32_t, N>& gaps, const std::array<const
|
||||
* @param gaps List of size gaps
|
||||
*/
|
||||
template<size_t N>
|
||||
constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
|
||||
constexpr void footer(const std::array<int32_t, N>& gaps) noexcept {
|
||||
formatted_line(gaps, "└", "┴", "─", "┘");
|
||||
}
|
||||
|
||||
#define duration_ns(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count()
|
||||
#define perf_counter_ns() std::chrono::high_resolution_clock::now()
|
||||
template<typename T>
|
||||
constexpr int64_t duration_ns(const T& a) noexcept {
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(a).count();
|
||||
}
|
||||
constexpr auto perf_counter_ns = std::chrono::high_resolution_clock::now;
|
||||
|
||||
/**
|
||||
* @brief Format the time in seconds in human readable format.
|
||||
|
Reference in New Issue
Block a user