Compare commits

..

3 Commits

Author SHA1 Message Date
saundersp
a7d4ca9198 cpp : Typo fixes 2025-08-21 01:40:18 +02:00
saundersp
014326b1e3 cpp : Removed redundant keywords and replaced macros with constant expressions 2025-08-21 01:38:41 +02:00
saundersp
c55dd14a89 Updated Dockerfiles 2025-08-20 22:53:18 +02:00
11 changed files with 104 additions and 99 deletions

View File

@@ -1,11 +1,11 @@
FROM nvidia/cuda:12.6.2-devel-ubi9 AS builder FROM nvidia/cuda:13.0.0-devel-ubi9 AS builder
WORKDIR /home/ViolaJones/cpp WORKDIR /home/ViolaJones/cpp
COPY *.cu *.cpp *.hpp Makefile ./ COPY *.cu *.cpp *.hpp Makefile ./
RUN make -j "$(nproc)" && make -j "$(nproc)" ./bin/ViolaJonesTest RUN make -j "$(nproc)" && make -j "$(nproc)" ./bin/ViolaJonesTest
FROM nvidia/cuda:12.6.2-base-ubi9 FROM nvidia/cuda:13.0.0-base-ubi9
WORKDIR /home/ViolaJones/cpp WORKDIR /home/ViolaJones/cpp

View File

@@ -2,47 +2,47 @@
#include "data.hpp" #include "data.hpp"
#include "ViolaJones_device.hpp" #include "ViolaJones_device.hpp"
constexpr static inline void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept { constexpr static void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept {
memset(&feats[n], 0, 4 * sizeof(uint8_t)); memset(&feats[n], 0, 4 * sizeof(uint8_t));
n += 4; n += 4;
} }
constexpr static inline void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { constexpr static void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i + w; feats[n++] = i + w;
feats[n++] = j; feats[n++] = j;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { constexpr static void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i; feats[n++] = i;
feats[n++] = j; feats[n++] = j;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { constexpr static void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i; feats[n++] = i;
feats[n++] = j + h; feats[n++] = j + h;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { constexpr static void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i + 2 * w; feats[n++] = i + 2 * w;
feats[n++] = j; feats[n++] = j;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { constexpr static void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i; feats[n++] = i;
feats[n++] = j + 2 * h; feats[n++] = j + 2 * h;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { constexpr static void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i + w; feats[n++] = i + w;
feats[n++] = j + h; feats[n++] = j + h;
feats[n++] = w; feats[n++] = w;
@@ -228,7 +228,7 @@ np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, cons
} }
/** /**
* @brief Select the best classifer given their predictions. * @brief Select the best classifier given their predictions.
* *
* @param classifiers The weak classifiers * @param classifiers The weak classifiers
* @param weights Trained weights of each classifiers * @param weights Trained weights of each classifiers
@@ -254,7 +254,7 @@ std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array
} }
/** /**
* @brief Train the weak calssifiers. * @brief Train the weak classifiers.
* *
* @param T Number of weak classifiers * @param T Number of weak classifiers
* @param X_feat Integrated features * @param X_feat Integrated features
@@ -271,7 +271,7 @@ std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np:
weights /= np::sum(weights); weights /= np::sum(weights);
const np::Array<float64_t> classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights); const np::Array<float64_t> classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights);
const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y); const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y);
float64_t beta = error / (1.0 - error); const float64_t beta = error / (1.0 - error);
weights *= np::pow(beta, (1.0 - accuracy)); weights *= np::pow(beta, (1.0 - accuracy));
alphas[t] = std::log(1.0 / beta); alphas[t] = std::log(1.0 / beta);
final_classifier[t * 3] = clf; final_classifier[t * 3] = clf;

View File

@@ -86,7 +86,7 @@ T benchmark_function(const char* const step_name, const int32_t& column_width, c
#endif #endif
const std::chrono::system_clock::time_point start = perf_counter_ns(); const std::chrono::system_clock::time_point start = perf_counter_ns();
const T res = fnc(std::forward<Args>(args)...); const T res = fnc(std::forward<Args>(args)...);
const long long time_spent = duration_ns(perf_counter_ns() - start); const int64_t time_spent = duration_ns(perf_counter_ns() - start);
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
return res; return res;
} }
@@ -109,7 +109,7 @@ void benchmark_function_void(const char* const step_name, const int32_t& column_
#endif #endif
const std::chrono::system_clock::time_point start = perf_counter_ns(); const std::chrono::system_clock::time_point start = perf_counter_ns();
fnc(std::forward<Args>(args)...); fnc(std::forward<Args>(args)...);
const long long time_spent = duration_ns(perf_counter_ns() - start); const int64_t time_spent = duration_ns(perf_counter_ns() - start);
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
} }

View File

@@ -114,7 +114,7 @@ static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept
np::Array<uint32_t> d_inter = copyToDevice<uint32_t>("inter", inter); np::Array<uint32_t> d_inter = copyToDevice<uint32_t>("inter", inter);
const dim3 dimGrid(n_block_x, n_block_y, k); const dim3 dimGrid(n_block_x, n_block_y, k);
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y); constexpr dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
__kernel_scan_3d__<<<dimGrid, dimBlock>>>(n, height, d_inter, d_X); __kernel_scan_3d__<<<dimGrid, dimBlock>>>(n, height, d_inter, d_X);
_print_cuda_error_("synchronize", cudaDeviceSynchronize()); _print_cuda_error_("synchronize", cudaDeviceSynchronize());
@@ -182,7 +182,7 @@ static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexce
const size_t n_block_x = static_cast<size_t>(std::ceil(static_cast<float64_t>(X.shape[1]) / static_cast<float64_t>(NB_THREADS_2D_X))); const size_t n_block_x = static_cast<size_t>(std::ceil(static_cast<float64_t>(X.shape[1]) / static_cast<float64_t>(NB_THREADS_2D_X)));
const size_t n_block_y = static_cast<size_t>(std::ceil(static_cast<float64_t>(X.shape[2]) / static_cast<float64_t>(NB_THREADS_2D_Y))); const size_t n_block_y = static_cast<size_t>(std::ceil(static_cast<float64_t>(X.shape[2]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
const dim3 dimGrid(n_block_x, n_block_y, X.shape[0]); const dim3 dimGrid(n_block_x, n_block_y, X.shape[0]);
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y); constexpr dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
__transpose_kernel__<<<dimGrid, dimBlock>>>(d_X, d_Xt); __transpose_kernel__<<<dimGrid, dimBlock>>>(d_X, d_Xt);
_print_cuda_error_("synchronize", cudaDeviceSynchronize()); _print_cuda_error_("synchronize", cudaDeviceSynchronize());
_print_cuda_error_("memcpy d_Xt", cudaMemcpy(Xt.data, d_Xt.data, np::prod(Xt.shape) * sizeof(uint32_t), cudaMemcpyDeviceToHost)); _print_cuda_error_("memcpy d_Xt", cudaMemcpy(Xt.data, d_Xt.data, np::prod(Xt.shape) * sizeof(uint32_t), cudaMemcpyDeviceToHost));
@@ -274,7 +274,7 @@ np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::
np::Array<uint8_t> d_y = copyToDevice<uint8_t>("y", y); np::Array<uint8_t> d_y = copyToDevice<uint8_t>("y", y);
const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z))); const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z)));
constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z); constexpr dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z);
__train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg); __train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg);
_print_cuda_error_("synchronize", cudaDeviceSynchronize()); _print_cuda_error_("synchronize", cudaDeviceSynchronize());
@@ -347,7 +347,7 @@ np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Arr
const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X))); const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X)));
const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y))); const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
const dim3 dimGrid(dimX, dimY); const dim3 dimGrid(dimX, dimY);
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y); constexpr dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
__apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii); __apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii);
_print_cuda_error_("synchronize", cudaDeviceSynchronize()); _print_cuda_error_("synchronize", cudaDeviceSynchronize());

View File

@@ -6,22 +6,22 @@
#define MODEL_DIR "./models" #define MODEL_DIR "./models"
#ifdef __CUDACC__ #ifdef __CUDACC__
#define NB_THREADS 1024 constexpr size_t NB_THREADS = 1024;
#define NB_THREADS_2D_X 32 constexpr size_t NB_THREADS_2D_X = 32;
#define NB_THREADS_2D_Y 32 constexpr size_t NB_THREADS_2D_Y = 32;
#define NB_THREADS_3D_X 16 constexpr size_t NB_THREADS_3D_X = 16;
#define NB_THREADS_3D_Y 16 constexpr size_t NB_THREADS_3D_Y = 16;
#define NB_THREADS_3D_Z 4 constexpr size_t NB_THREADS_3D_Z = 4;
#define M static_cast<size_t>(log2f(NB_THREADS_2D_Y)) #define M static_cast<size_t>(log2f(NB_THREADS_2D_Y))
#endif #endif
// Save state to avoid recalculation on restart // Save state to avoid recalculation on restart
#define SAVE_STATE true constexpr bool SAVE_STATE = true;
// Redo the state even if it's already saved // Redo the state even if it's already saved
#define FORCE_REDO false constexpr bool FORCE_REDO = false;
// Use GPU to greatly accelerate runtime // Use GPU to greatly accelerate runtime
#define GPU_BOOSTED true constexpr bool GPU_BOOSTED = true;
// Depending on what you set, the output label will be as follow : // Depending on what you set, the output label will be as follow :
// ┌─────────────┬───────┐ // ┌─────────────┬───────┐
// │ GPU_BOOSTED │ LABEL │ // │ GPU_BOOSTED │ LABEL │
@@ -31,16 +31,16 @@
// └─────────────┴───────┘ // └─────────────┴───────┘
// Number of weak classifiers // Number of weak classifiers
// [[maybe_unused]] constexpr const std::array TS{ 1 }; // constexpr std::array TS{ 1 };
// [[maybe_unused]] constexpr const std::array TS{ 1, 5, 10 }; // constexpr std::array TS{ 1, 5, 10 };
[[maybe_unused]] constexpr const std::array TS{ 1, 5, 10, 25, 50 }; constexpr std::array TS{ 1, 5, 10, 25, 50 };
// [[maybe_unused]] constexpr const std::array TS{ 1, 5, 10, 25, 50, 100, 200, 300 }; // constexpr std::array TS{ 1, 5, 10, 25, 50, 100, 200, 300 };
// [[maybe_unused]] constexpr const std::array TS{ 1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000 }; // constexpr std::array TS{ 1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000 };
// Enable verbose output (for debugging purposes) // Enable verbose output (for debugging purposes)
#define __DEBUG false constexpr bool __DEBUG = false;
// Debugging options // Debugging options
#if __DEBUG #if __DEBUG
#define IDX_INSPECT 4548 constexpr size_t IDX_INSPECT = 4548;
#define IDX_INSPECT_OFFSET 100 constexpr size_t IDX_INSPECT_OFFSET = 100;
#endif #endif

View File

@@ -4,11 +4,11 @@
#include <cmath> #include <cmath>
#include <cassert> #include <cassert>
#include <functional> #include <functional>
#include <stdint.h> #include <cstdint>
#include "config.hpp" #include "config.hpp"
#define BUFFER_SIZE 256 constexpr size_t BUFFER_SIZE = 256;
#define STRING_INT_SIZE 8 // Length of a number in log10 (including '-') constexpr size_t STRING_INT_SIZE = 8; // Length of a number in log10 (including '-')
#ifndef __CUDACC__ #ifndef __CUDACC__
#define __host__ #define __host__
@@ -20,7 +20,7 @@ typedef double float64_t;
typedef long double float128_t; typedef long double float128_t;
namespace np { namespace np {
constexpr const float64_t inf = std::numeric_limits<float64_t>::infinity(); constexpr float64_t inf = std::numeric_limits<float64_t>::infinity();
typedef struct Slice { typedef struct Slice {
size_t x = 0, y = 0, z = 0; size_t x = 0, y = 0, z = 0;
@@ -35,14 +35,14 @@ namespace np {
#endif #endif
__host__ __device__ __host__ __device__
Shape(void) noexcept { constexpr Shape(void) noexcept {
#if __DEBUG #if __DEBUG
printf("Shape created (default)\n"); printf("Shape created (default)\n");
#endif #endif
} }
__host__ __device__ __host__ __device__
Shape(const size_t& length, size_t* const data) noexcept : length(length), data(data), refcount(new size_t(1)) { inline Shape(const size_t& length, size_t* const data) noexcept : length(length), data(data), refcount(new size_t(1)) {
#if __DEBUG #if __DEBUG
printf("Shape created (raw)\n"); printf("Shape created (raw)\n");
for(size_t i = 0; i < length; ++i) for(size_t i = 0; i < length; ++i)
@@ -51,7 +51,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) { inline Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
#if __DEBUG #if __DEBUG
printf("Shape created (initializer)\n"); printf("Shape created (initializer)\n");
#endif #endif
@@ -65,7 +65,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
Shape(const Shape& shape) noexcept { constexpr Shape(const Shape& shape) noexcept {
#if __DEBUG #if __DEBUG
printf("Shape created (copy)\n"); printf("Shape created (copy)\n");
#endif #endif
@@ -97,7 +97,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
Shape(Shape&& shape) noexcept { constexpr Shape(Shape&& shape) noexcept {
#if __DEBUG #if __DEBUG
printf("Shape created (move)\n"); printf("Shape created (move)\n");
#endif #endif
@@ -127,7 +127,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
~Shape(void) noexcept { inline ~Shape(void) noexcept {
if(refcount == nullptr){ if(refcount == nullptr){
#if __DEBUG #if __DEBUG
printf("Shape refcount freed more than once\n"); printf("Shape refcount freed more than once\n");
@@ -159,7 +159,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
Shape& operator=(const Shape& shape) noexcept { constexpr Shape& operator=(const Shape& shape) noexcept {
#if __DEBUG #if __DEBUG
printf("Shape created (assign copy)\n"); printf("Shape created (assign copy)\n");
#endif #endif
@@ -190,7 +190,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
Shape& operator=(Shape&& shape) noexcept { constexpr Shape& operator=(Shape&& shape) noexcept {
#if __DEBUG #if __DEBUG
printf("Shape created (assign move)\n"); printf("Shape created (assign move)\n");
#endif #endif
@@ -221,7 +221,6 @@ namespace np {
return *this; return *this;
} }
__host__ __device__ __host__ __device__
constexpr size_t& operator[](const size_t& i) const { constexpr size_t& operator[](const size_t& i) const {
#if __DEBUG #if __DEBUG
@@ -263,42 +262,42 @@ namespace np {
size_t* refcount = nullptr; size_t* refcount = nullptr;
__host__ __device__ __host__ __device__
Array(void) noexcept { constexpr Array(void) noexcept {
#if __DEBUG #if __DEBUG
printf("Array created (default)\n"); printf("Array created (default)\n");
#endif #endif
} }
__host__ __device__ __host__ __device__
Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { constexpr Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
#if __DEBUG #if __DEBUG
printf("Array created (raw, copy shape)\n"); printf("Array created (raw, copy shape)\n");
#endif #endif
} }
__host__ __device__ __host__ __device__
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { constexpr Array(const Shape& _shape) noexcept : shape(_shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
#if __DEBUG #if __DEBUG
printf("Array created (raw empty, copy shape)\n"); printf("Array created (raw empty, copy shape)\n");
#endif #endif
} }
__host__ __device__ __host__ __device__
Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { constexpr Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
#if __DEBUG #if __DEBUG
printf("Array created (raw, move shape)\n"); printf("Array created (raw, move shape)\n");
#endif #endif
} }
__host__ __device__ __host__ __device__
Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { constexpr Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
#if __DEBUG #if __DEBUG
printf("Array created (raw empty, move shape)\n"); printf("Array created (raw empty, move shape)\n");
#endif #endif
} }
__host__ __device__ __host__ __device__
Array(const Array& array) noexcept : shape(array.shape) { constexpr Array(const Array& array) noexcept : shape(array.shape) {
#if __DEBUG #if __DEBUG
printf("Array created (copy)\n"); printf("Array created (copy)\n");
#endif #endif
@@ -326,7 +325,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
Array(Array&& array) noexcept : shape(std::move(array.shape)) { constexpr Array(Array&& array) noexcept : shape(std::move(array.shape)) {
#if __DEBUG #if __DEBUG
printf("Array created (move)\n"); printf("Array created (move)\n");
#endif #endif
@@ -350,7 +349,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
~Array(void) noexcept { inline ~Array(void) noexcept {
if(refcount == nullptr){ if(refcount == nullptr){
#if __DEBUG #if __DEBUG
printf("Array refcount freed more than once\n"); printf("Array refcount freed more than once\n");
@@ -379,7 +378,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
Array& operator=(const Array& array) noexcept { constexpr Array& operator=(const Array& array) noexcept {
#if __DEBUG #if __DEBUG
printf("Array created (assign copy)\n"); printf("Array created (assign copy)\n");
#endif #endif
@@ -409,7 +408,7 @@ namespace np {
} }
__host__ __device__ __host__ __device__
Array& operator=(Array&& array) noexcept { constexpr Array& operator=(Array&& array) noexcept {
#if __DEBUG #if __DEBUG
printf("Array created (assign move)\n"); printf("Array created (assign move)\n");
#endif #endif
@@ -456,36 +455,36 @@ namespace np {
}; };
template<typename T> template<typename T>
inline Array<T> empty(Shape&& shape) noexcept { constexpr Array<T> empty(Shape&& shape) noexcept {
return Array<T>(shape); return Array<T>(shape);
} }
template<typename T> template<typename T>
inline Array<T> empty(const Shape& shape) noexcept { constexpr Array<T> empty(const Shape& shape) noexcept {
return Array<T>(shape); return Array<T>(shape);
} }
template<typename T> template<typename T>
inline Array<T> empty(const std::initializer_list<size_t>& dims) noexcept { constexpr Array<T> empty(const std::initializer_list<size_t>& dims) noexcept {
return Array<T>(dims); return Array<T>(dims);
} }
template<typename T> template<typename T>
Array<T> zeros(Shape&& shape) noexcept { constexpr Array<T> zeros(Shape&& shape) noexcept {
Array<T> res(shape); Array<T> res(shape);
memset(res.data, 0, sizeof(T) * np::prod(res.shape)); memset(res.data, 0, sizeof(T) * np::prod(res.shape));
return res; return res;
} }
template<typename T> template<typename T>
Array<T> zeros(const Shape& shape) noexcept { constexpr Array<T> zeros(const Shape& shape) noexcept {
Array<T> res(shape); Array<T> res(shape);
memset(res.data, 0, sizeof(T) * np::prod(res.shape)); memset(res.data, 0, sizeof(T) * np::prod(res.shape));
return res; return res;
} }
template<typename T> template<typename T>
Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept { constexpr Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept {
Array<T> res(dims); Array<T> res(dims);
memset(res.data, 0, sizeof(T) * np::prod(res.shape)); memset(res.data, 0, sizeof(T) * np::prod(res.shape));
return res; return res;
@@ -599,7 +598,7 @@ namespace np {
template<typename T> template<typename T>
template<typename F> template<typename F>
Array<T> Array<T>::operator-(const F& other) const { constexpr Array<T> Array<T>::operator-(const F& other) const noexcept {
np::Array<T> res = np::empty<T>(shape); np::Array<T> res = np::empty<T>(shape);
const size_t total = prod(shape); const size_t total = prod(shape);
for(size_t i = 0; i < total; ++i) for(size_t i = 0; i < total; ++i)
@@ -609,7 +608,7 @@ namespace np {
template<typename T> template<typename T>
template<typename F> template<typename F>
Array<T> Array<T>::operator-(const np::Array<F>& other) const { constexpr Array<T> Array<T>::operator-(const np::Array<F>& other) const {
#if __DEBUG #if __DEBUG
if (shape != other.shape){ if (shape != other.shape){
printf("Incompatible shapes\n"); printf("Incompatible shapes\n");
@@ -665,7 +664,7 @@ namespace np {
} }
template<typename T, typename F> template<typename T, typename F>
np::Array<T> pow(const F& k, const Array<T>& array) noexcept { constexpr np::Array<T> pow(const F& k, const Array<T>& array) noexcept {
np::Array<T> result = np::empty<T>(array.shape); np::Array<T> result = np::empty<T>(array.shape);
const size_t total = prod(array.shape); const size_t total = prod(array.shape);
for(size_t i = 0; i < total; ++i) for(size_t i = 0; i < total; ++i)
@@ -692,7 +691,7 @@ namespace np {
//} //}
template<typename T, typename F> template<typename T, typename F>
Array<T> astype(const Array<F>& array) noexcept { constexpr Array<T> astype(const Array<F>& array) noexcept {
Array<T> res = empty<T>(array.shape); Array<T> res = empty<T>(array.shape);
const size_t total = prod(array.shape); const size_t total = prod(array.shape);
for(size_t i = 0; i < total; ++i) for(size_t i = 0; i < total; ++i)
@@ -701,7 +700,7 @@ namespace np {
} }
template<typename T> template<typename T>
Array<T> operator-(const T& k, const Array<T>& other) noexcept { constexpr Array<T> operator-(const T& k, const Array<T>& other) noexcept {
np::Array<T> res = empty<T>(other.shape); np::Array<T> res = empty<T>(other.shape);
const size_t total = prod(other.shape); const size_t total = prod(other.shape);
for(size_t i = 0; i < total; ++i) for(size_t i = 0; i < total; ++i)
@@ -748,7 +747,7 @@ constexpr np::Array<T>& map(np::Array<T>& a, const std::function<T(const size_t&
template<typename T> template<typename T>
__host__ __device__ __host__ __device__
constexpr inline static void swap(T* const a, T* const b) noexcept { constexpr void swap(T* const a, T* const b) noexcept {
if (a == b) return; if (a == b) return;
const T temp = *a; const T temp = *a;
*a = *b; *a = *b;
@@ -793,7 +792,7 @@ std::array<np::Array<uint8_t>, 4> load_datasets(void);
void print_error_file(const char* const) noexcept; void print_error_file(const char* const) noexcept;
template<typename T> template<typename T>
void save(const np::Array<T>& d, const char* const filename) { constexpr void save(const np::Array<T>& d, const char* const filename) {
FILE* const output = fopen(filename, "wb"); FILE* const output = fopen(filename, "wb");
if (output == NULL) { if (output == NULL) {
print_error_file(filename); print_error_file(filename);
@@ -808,7 +807,7 @@ void save(const np::Array<T>& d, const char* const filename) {
} }
template<typename T> template<typename T>
np::Array<T> load(const char* const filename) { constexpr np::Array<T> load(const char* const filename) {
FILE* const input = fopen(filename, "rb"); FILE* const input = fopen(filename, "rb");
if (input == NULL) { if (input == NULL) {
print_error_file(filename); print_error_file(filename);
@@ -838,7 +837,7 @@ np::Array<T> load(const char* const filename) {
#ifdef __CUDACC__ #ifdef __CUDACC__
template<typename T> template<typename T>
np::Array<T> copyToDevice(const char* const name, const np::Array<T>& array) noexcept { constexpr np::Array<T> copyToDevice(const char* const name, const np::Array<T>& array) noexcept {
const size_t array_size = np::prod(array.shape) * sizeof(T); const size_t array_size = np::prod(array.shape) * sizeof(T);
const size_t shape_size = array.shape.length * sizeof(size_t); const size_t shape_size = array.shape.length * sizeof(size_t);
np::Array<T> d_array; np::Array<T> d_array;
@@ -869,7 +868,7 @@ constexpr void cudaFree(const char* const name, np::Array<T>& array) noexcept {
array.shape.data = nullptr; array.shape.data = nullptr;
} }
constexpr inline void _print_cuda_error_(const char* const name, const cudaError_t& err) noexcept { constexpr void _print_cuda_error_(const char* const name, const cudaError_t& err) noexcept {
if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err)); if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err));
} }
#endif #endif

View File

@@ -31,7 +31,7 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
for (const char* const folder_name : { "models", "out" }) for (const char* const folder_name : { "models", "out" })
std::filesystem::create_directory(folder_name); std::filesystem::create_directory(folder_name);
const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns(); const std::chrono::time_point<std::chrono::high_resolution_clock> preproc_timestamp = perf_counter_ns();
const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 }; const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 };
header(preproc_gaps, { "Preprocessing", "Time spent (ns)", "Formatted time spent" }); header(preproc_gaps, { "Preprocessing", "Time spent (ns)", "Formatted time spent" });
@@ -113,7 +113,7 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
print(X_test_feat_argsort.shape); print(X_test_feat_argsort.shape);
print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
#endif #endif
const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp); const int64_t time_spent = duration_ns(perf_counter_ns() - preproc_timestamp);
formatted_line(preproc_gaps, "", "", "", ""); formatted_line(preproc_gaps, "", "", "", "");
formatted_row(preproc_gaps, { "Preprocessing summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row(preproc_gaps, { "Preprocessing summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
footer(preproc_gaps); footer(preproc_gaps);
@@ -154,7 +154,7 @@ std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array
#endif #endif
models[i++] = { alphas, final_classifiers }; models[i++] = { alphas, final_classifiers };
} }
const long long time_spent = duration_ns(perf_counter_ns() - training_timestamp); const int64_t time_spent = duration_ns(perf_counter_ns() - training_timestamp);
formatted_line(training_gaps, "", "", "", ""); formatted_line(training_gaps, "", "", "", "");
formatted_row(training_gaps, { "Training summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row(training_gaps, { "Training summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
footer(training_gaps); footer(training_gaps);
@@ -177,15 +177,15 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
std::array<std::array<float64_t, 8>, TS.size()> results; std::array<std::array<float64_t, 8>, TS.size()> results;
size_t i = 0; size_t i = 0;
long long total_train_timestamp = 0; int64_t total_train_timestamp = 0;
long long total_test_timestamp = 0; int64_t total_test_timestamp = 0;
for (const auto& [ alphas, final_classifiers ] : models) { for (const auto& [ alphas, final_classifiers ] : models) {
char title[BUFFER_SIZE] = { 0 }; char title[BUFFER_SIZE] = { 0 };
snprintf(title, BUFFER_SIZE, "ViolaJones T = %-4i (%s)", TS[i], LABEL); snprintf(title, BUFFER_SIZE, "ViolaJones T = %-4i (%s)", TS[i], LABEL);
std::chrono::system_clock::time_point start = perf_counter_ns(); std::chrono::system_clock::time_point start = perf_counter_ns();
const np::Array<uint8_t> y_pred_train = classify_viola_jones(alphas, final_classifiers, X_train_feat); const np::Array<uint8_t> y_pred_train = classify_viola_jones(alphas, final_classifiers, X_train_feat);
const long long t_pred_train = duration_ns(perf_counter_ns() - start); const int64_t t_pred_train = duration_ns(perf_counter_ns() - start);
total_train_timestamp += t_pred_train; total_train_timestamp += t_pred_train;
const float64_t e_acc = accuracy_score(y_train, y_pred_train); const float64_t e_acc = accuracy_score(y_train, y_pred_train);
const float64_t e_f1 = f1_score(y_train, y_pred_train); const float64_t e_f1 = f1_score(y_train, y_pred_train);
@@ -194,7 +194,7 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
start = perf_counter_ns(); start = perf_counter_ns();
const np::Array<uint8_t> y_pred_test = classify_viola_jones(alphas, final_classifiers, X_test_feat); const np::Array<uint8_t> y_pred_test = classify_viola_jones(alphas, final_classifiers, X_test_feat);
const long long t_pred_test = duration_ns(perf_counter_ns() - start); const int64_t t_pred_test = duration_ns(perf_counter_ns() - start);
total_test_timestamp += t_pred_test; total_test_timestamp += t_pred_test;
const float64_t t_acc = accuracy_score(y_test, y_pred_test); const float64_t t_acc = accuracy_score(y_test, y_pred_test);
const float64_t t_f1 = f1_score(y_test, y_pred_test); const float64_t t_f1 = f1_score(y_test, y_pred_test);
@@ -242,7 +242,7 @@ void unit_test(void) {
++n_total; ++n_total;
const std::chrono::system_clock::time_point start = perf_counter_ns(); const std::chrono::system_clock::time_point start = perf_counter_ns();
const bool state = fnc(); const bool state = fnc();
const long long time_spent = duration_ns(perf_counter_ns() - start); const int64_t time_spent = duration_ns(perf_counter_ns() - start);
if(state){ if(state){
formatted_row(unit_gaps, { title, "Passed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row(unit_gaps, { title, "Passed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
++n_success; ++n_success;
@@ -330,7 +330,7 @@ void unit_test(void) {
} }
} }
const long long time_spent = duration_ns(perf_counter_ns() - unit_timestamp); const int64_t time_spent = duration_ns(perf_counter_ns() - unit_timestamp);
if (n_total == 0) if (n_total == 0)
formatted_row(unit_gaps, { "Unit testing summary", "No files", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row(unit_gaps, { "Unit testing summary", "No files", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });

View File

@@ -1,10 +1,10 @@
#include "toolbox.hpp" #include "toolbox.hpp"
inline static constexpr uint64_t u64(const double& n) noexcept { return static_cast<uint64_t>(n); } static constexpr uint64_t u64(const double& n) noexcept { return static_cast<uint64_t>(n); }
static const constexpr size_t N_TIMES = 11; static constexpr size_t N_TIMES = 11;
static const constexpr std::array<const char*, N_TIMES> time_formats = { "ns", "us", "ms", "s", "m", "h", "j", "w", "M", "y", "c" }; static constexpr std::array<const char*, N_TIMES> time_formats = { "ns", "us", "ms", "s", "m", "h", "j", "w", "M", "y", "c" };
static const constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3), u64(1e6), u64(1e9), u64(6e10), u64(36e11), u64(864e11), static constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3), u64(1e6), u64(1e9), u64(6e10), u64(36e11), u64(864e11),
u64(6048e11), u64(26784e11), u64(31536e12), u64(31536e14) }; u64(6048e11), u64(26784e11), u64(31536e12), u64(31536e14) };
/** /**
@@ -60,9 +60,9 @@ std::string format_time_ns(uint64_t time) noexcept {
return s; return s;
} }
static const constexpr size_t N_BYTES = 7; static constexpr size_t N_BYTES = 7;
static const constexpr std::array<const char*, N_BYTES> bytes_formats = { "", "K", "M", "G", "T", "P", "E", }; //"Z", "Y" }; static constexpr std::array<const char*, N_BYTES> bytes_formats = { "", "K", "M", "G", "T", "P", "E", }; //"Z", "Y" };
static const constexpr uint64_t total_bytes = u64(1)<<(10 * (N_BYTES - 1)); static constexpr uint64_t total_bytes = u64(1)<<(10 * (N_BYTES - 1));
/** /**
* @brief Convert the number of byte in JEDEC standard form. * @brief Convert the number of byte in JEDEC standard form.

View File

@@ -60,12 +60,15 @@ constexpr void header(const std::array<int32_t, N>& gaps, const std::array<const
* @param gaps List of size gaps * @param gaps List of size gaps
*/ */
template<size_t N> template<size_t N>
constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept { constexpr void footer(const std::array<int32_t, N>& gaps) noexcept {
formatted_line(gaps, "", "", "", ""); formatted_line(gaps, "", "", "", "");
} }
#define duration_ns(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count() template<typename T>
#define perf_counter_ns() std::chrono::high_resolution_clock::now() constexpr int64_t duration_ns(const T& a) noexcept {
return std::chrono::duration_cast<std::chrono::nanoseconds>(a).count();
}
constexpr auto perf_counter_ns = std::chrono::high_resolution_clock::now;
/** /**
* @brief Format the time in seconds in human readable format. * @brief Format the time in seconds in human readable format.
@@ -96,7 +99,7 @@ std::string format_byte_size(uint64_t) noexcept;
* @brief Format a number with a separator (i.e. 1000 as 1,000) * @brief Format a number with a separator (i.e. 1000 as 1,000)
* *
* @param k number to format * @param k number to format
* @param separator used between each thouand * @param separator used between each thousand
* @return Formatted number * @return Formatted number
*/ */
std::string thousand_sep(uint64_t, const char& = ',') noexcept; std::string thousand_sep(uint64_t, const char& = ',') noexcept;

View File

@@ -1,6 +1,6 @@
FROM alpine:3.20.3 FROM alpine:3.22.1
RUN apk add --no-cache curl=8.11.0-r1 python3=3.12.7-r0 && rm -rf /var/cache/apk* RUN apk add --no-cache curl=8.14.1-r1 python3=3.12.11-r0 && rm -rf /var/cache/apk*
WORKDIR /home/ViolaJones/downloader WORKDIR /home/ViolaJones/downloader
COPY requirements.txt activate.sh ./ COPY requirements.txt activate.sh ./

View File

@@ -1,12 +1,15 @@
FROM nvidia/cuda:12.6.2-devel-ubi9 FROM nvidia/cuda:13.0.0-devel-ubi9
RUN dnf install -y python3.12-3.12.1-4.el9_4.4 \ RUN dnf install -y \
make-1:4.3-8.el9 \
python3.12-3.12.9-1.el9_6.1 \
&& dnf clean all \ && dnf clean all \
&& ln -s /usr/bin/python3.12 /usr/bin/python && ln -s /usr/bin/python3.12 /usr/bin/python
WORKDIR /home/ViolaJones/python WORKDIR /home/ViolaJones/python
COPY Makefile activate.sh requirements.txt ./ COPY Makefile activate.sh requirements.txt ./
RUN make venv RUN make venv && rm requirements.txt
COPY *.py ./ COPY *.py ./
ENTRYPOINT ["make"] ENTRYPOINT ["make"]