From c71b04f00d55571729777345da3b7fa5fe5e5fb9 Mon Sep 17 00:00:00 2001 From: saundersp Date: Sun, 28 Apr 2024 22:11:33 +0200 Subject: [PATCH] cpp : Added documentation --- cpp/Makefile | 2 +- cpp/ViolaJones.cpp | 117 ++++++++++++++-- cpp/ViolaJones.hpp | 171 ++++++++++++++++++++++- cpp/ViolaJonesCPU.cpp | 88 +++++++++++- cpp/ViolaJonesCPU.hpp | 8 -- cpp/ViolaJonesGPU.cu | 283 +++++++++++++++++++++++++++----------- cpp/ViolaJonesGPU.hpp | 8 -- cpp/ViolaJones_device.hpp | 39 ++++++ cpp/data.cpp | 27 +--- cpp/data.hpp | 155 ++++++++------------- cpp/data_device.cu | 16 +++ cpp/projet.cpp | 62 ++++----- cpp/toolbox.cpp | 13 +- cpp/toolbox.hpp | 66 ++++++++- cpp/toolbox_unit_test.cpp | 22 ++- cpp/toolbox_unit_test.hpp | 15 ++ 16 files changed, 797 insertions(+), 295 deletions(-) delete mode 100644 cpp/ViolaJonesCPU.hpp delete mode 100644 cpp/ViolaJonesGPU.hpp create mode 100644 cpp/ViolaJones_device.hpp create mode 100644 cpp/data_device.cu diff --git a/cpp/Makefile b/cpp/Makefile index 13fe258..2c5c2a6 100644 --- a/cpp/Makefile +++ b/cpp/Makefile @@ -11,7 +11,7 @@ CFLAGS := -dlto -O2 -Xcompiler -O2 CFLAGS := $(CFLAGS) -MMD -MP -Werror=all-warnings -Xcompiler -Wall,-Werror,-Werror=implicit-fallthrough=0,-Wextra EXEC := $(OBJ_DIR)/ViolaJones DATA := $(DATA_PATH)/X_train.bin $(DATA_PATH)/X_test.bin $(DATA_PATH)/y_train.bin $(DATA_PATH)/y_test.bin -SRC := $(shell find $(SRC_DIR) -name '*.cpp' -o -name '*.cu' ) +SRC := $(shell find $(SRC_DIR) -name '*.cpp' -o -name '*.cu') OBJ_EXT := o ifeq ($(OS), Windows_NT) EXEC := $(EXEC).exe diff --git a/cpp/ViolaJones.cpp b/cpp/ViolaJones.cpp index 93b7be7..8ac7fa5 100644 --- a/cpp/ViolaJones.cpp +++ b/cpp/ViolaJones.cpp @@ -1,55 +1,61 @@ #include #include "data.hpp" -#include "config.hpp" -#include "ViolaJonesGPU.hpp" +#include "ViolaJones_device.hpp" -static inline void add_empty_feature(const np::Array& feats, size_t& n) noexcept { +constexpr static inline void add_empty_feature(const np::Array& feats, size_t& n) noexcept { memset(&feats[n], 0, 4 * sizeof(uint8_t)); n += 4; } -static inline void add_right_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { +constexpr static inline void add_right_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { feats[n++] = i + w; feats[n++] = j; feats[n++] = w; feats[n++] = h; } -static inline void add_immediate_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { +constexpr static inline void add_immediate_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { feats[n++] = i; feats[n++] = j; feats[n++] = w; feats[n++] = h; } -static inline void add_bottom_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { +constexpr static inline void add_bottom_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { feats[n++] = i; feats[n++] = j + h; feats[n++] = w; feats[n++] = h; } -static inline void add_right2_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { +constexpr static inline void add_right2_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { feats[n++] = i + 2 * w; feats[n++] = j; feats[n++] = w; feats[n++] = h; } -static inline void add_bottom2_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { +constexpr static inline void add_bottom2_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { feats[n++] = i; feats[n++] = j + 2 * h; feats[n++] = w; feats[n++] = h; } -static inline void add_bottom_right_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { +constexpr static inline void add_bottom_right_feature(const np::Array& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { feats[n++] = i + w; feats[n++] = j + h; feats[n++] = w; feats[n++] = h; } +/** + * @brief Initialize the features based on the input shape. + * + * @param width Width of the image + * @param height Height of the image + * @return The initialized features + */ np::Array build_features(const uint16_t& width, const uint16_t& height) noexcept { size_t n = 0; uint16_t w, h, i, j; @@ -162,6 +168,12 @@ np::Array build_features(const uint16_t& width, const uint16_t& height) // return res; //} +/** + * @brief Initialize the weights of the weak classifiers based on the training labels. + * + * @param y_train Training labels + * @return The initialized weights + */ np::Array init_weights(const np::Array& y_train) noexcept { np::Array weights = np::empty(y_train.shape); const uint16_t t = np::sum(np::astype(y_train)); @@ -171,13 +183,30 @@ np::Array init_weights(const np::Array& y_train) noexcept { })); } -np::Array classify_weak_clf(const np::Array& X_feat_i, const size_t& j, const float64_t& threshold, const float64_t& polarity) noexcept { +/** + * @brief Classify the integrated features based on polarity and threshold. + * + * @param X_feat_i Integrated features + * @param j Index of the classifier + * @param threshold Trained threshold + * @param polarity Trained polarity + * @return Classified features + */ +static np::Array classify_weak_clf(const np::Array& X_feat_i, const size_t& j, const float64_t& threshold, const float64_t& polarity) noexcept { np::Array res = np::empty({ X_feat_i.shape[1] }); for(size_t i = 0; i < res.shape[0]; ++i) res[i] = polarity * X_feat_i[j * X_feat_i.shape[1] + i] < polarity * threshold ? 1 : 0; return res; } +/** + * @brief Classify the trained classifiers on the given features. + * + * @param alphas Trained alphas + * @param classifiers Trained classifiers + * @param X_feat integrated features + * @return Classification results + */ np::Array classify_viola_jones(const np::Array& alphas, const np::Array& classifiers, const np::Array& X_feat) noexcept { np::Array total = np::zeros({ X_feat.shape[1] }); @@ -198,6 +227,15 @@ np::Array classify_viola_jones(const np::Array& alphas, cons return y_pred; } +/** + * @brief Select the best classifer given their predictions. + * + * @param classifiers The weak classifiers + * @param weights Trained weights of each classifiers + * @param X_feat Integrated features + * @param y Features labels + * @return Index of the best classifier, the best error and the best accuracy + */ std::tuple> select_best(const np::Array& classifiers, const np::Array& weights, const np::Array& X_feat, const np::Array& y) noexcept { std::tuple> res = { -1, np::inf, np::empty({ X_feat.shape[0] }) }; @@ -215,6 +253,15 @@ std::tuple> select_best(const np::Array return res; } +/** + * @brief Train the weak calssifiers. + * + * @param T Number of weak classifiers + * @param X_feat Integrated features + * @param X_feat_argsort Sorted indexes of the integrated features + * @param y Features labels + * @return List of trained alphas and the list of the final classifiers + */ std::array, 2> train_viola_jones(const size_t& T, const np::Array& X_feat, const np::Array& X_feat_argsort, const np::Array& y) noexcept { np::Array weights = init_weights(y); np::Array alphas = np::empty({ T }); @@ -222,11 +269,7 @@ std::array, 2> train_viola_jones(const size_t& T, const np: for(size_t t = 0; t < T; ++t ){ weights /= np::sum(weights); -#if GPU_BOOSTED - const np::Array classifiers = train_weak_clf_gpu(X_feat, X_feat_argsort, y, weights); -#else - const np::Array classifiers = train_weak_clf_cpu(X_feat, X_feat_argsort, y, weights); -#endif + const np::Array classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights); const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y); float64_t beta = error / (1.0 - error); weights *= np::pow(beta, (1.0 - accuracy)); @@ -238,6 +281,13 @@ std::array, 2> train_viola_jones(const size_t& T, const np: return { alphas, final_classifier }; } +/** + * @brief Compute the accuracy score i.e. how a given set of measurements are close to their true value. + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed accuracy score + */ float64_t accuracy_score(const np::Array& y, const np::Array& y_pred) noexcept { float64_t res = 0.0; for(size_t i = 0; i < y.shape[0]; ++i) @@ -246,6 +296,13 @@ float64_t accuracy_score(const np::Array& y, const np::Array& return res / y.shape[0]; } +/** + * @brief Compute the precision score i.e. how a given set of measurements are close to each other. + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed precision score + */ float64_t precision_score(const np::Array& y, const np::Array& y_pred) noexcept { uint16_t true_positive = 0, false_positive = 0; for(size_t i = 0; i < y.shape[0]; ++i) @@ -258,6 +315,13 @@ float64_t precision_score(const np::Array& y, const np::Array& return static_cast(true_positive) / (true_positive + false_positive); } +/** + * @brief Compute the recall score i.e. the ratio (TP / (TP + FN)) where TP is the number of true positives and FN the number of false negatives. + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed recall score + */ float64_t recall_score(const np::Array& y, const np::Array& y_pred) noexcept { uint16_t true_positive = 0, false_negative = 0; for(size_t i = 0; i < y.shape[0]; ++i) @@ -271,12 +335,35 @@ float64_t recall_score(const np::Array& y, const np::Array& y_ return static_cast(true_positive) / (true_positive + false_negative); } +/** + * @brief Compute the F1 score aka balanced F-score or F-measure. + * + * F1 = (2 * TP) / (2 * TP + FP + FN) + * where TP is the true positives, + * FP is the false positives, + * and FN is the false negatives + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed F1 score + */ float64_t f1_score(const np::Array& y, const np::Array& y_pred) noexcept { const float64_t precision = precision_score(y, y_pred); const float64_t recall = recall_score(y, y_pred); return 2 * (precision * recall) / (precision + recall); } +/** + * @brief Compute the confusion matrix to evaluate a given classification. + * + * A confusion matrix of a binary classification consists of a 2x2 matrix containing + * | True negatives | False positives | + * | False negatives | True positives | + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed confusion matrix + */ std::tuple confusion_matrix(const np::Array& y, const np::Array& y_pred) noexcept { uint16_t true_positive = 0, false_positive = 0, true_negative = 0, false_negative = 0; for(size_t i = 0; i < y.shape[0]; ++i) diff --git a/cpp/ViolaJones.hpp b/cpp/ViolaJones.hpp index 08bcff9..3473292 100644 --- a/cpp/ViolaJones.hpp +++ b/cpp/ViolaJones.hpp @@ -2,8 +2,15 @@ #include #include "data.hpp" #include "toolbox.hpp" -//#include "config.hpp" +/** + * @brief Test if a array from a CPU computation is equal to a GPU computation equivalent. + * + * @tparam T Inner type of the arrays to test + * @param cpu CPU Array + * @param gpu GPU Array + * @return Whether the test was succesful + */ template bool unit_test_cpu_vs_gpu(const np::Array& cpu, const np::Array& gpu) noexcept { if (cpu.shape != gpu.shape) { @@ -27,6 +34,14 @@ bool unit_test_cpu_vs_gpu(const np::Array& cpu, const np::Array& gpu) noex return eq == length; } +/** + * @brief Test if a given 2D array of indices sort a given 2D array + * + * @tparam T Inner type of the array to test + * @param a 2D Array of data + * @param indices 2D Indices that sort the array + * @return Whether the test was successful + */ template bool unit_test_argsort_2d(const np::Array& a, const np::Array& indices) noexcept { if (a.shape != indices.shape) { @@ -51,6 +66,18 @@ bool unit_test_argsort_2d(const np::Array& a, const np::Array& indi return correct == total; } +/** + * @brief Benchmark a function and display the result in stdout. + * + * @tparam T Resulting type of the function to benchmark + * @tparam F Signature of the function to call + * @tparam Args Arguments variadic of the function to call + * @param step_name Name of the function to log + * @param column_width Width of the column to print during logging + * @param fnc Function to benchmark + * @param args Arguments to pass to the function to call + * @return Result of the benchmarked function + */ template T benchmark_function(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept { #if __DEBUG == false @@ -64,6 +91,16 @@ T benchmark_function(const char* const step_name, const int32_t& column_width, c return res; } +/** + * @brief Benchmark a function and display the result in stdout. + * + * @tparam F Signature of the function to call + * @tparam Args Arguments variadic of the function to call + * @param step_name Name of the function to log + * @param column_width Width of the column to print during logging + * @param fnc Function to benchmark + * @param args Arguments to pass to the function to call + */ template void benchmark_function_void(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept { #if __DEBUG == false @@ -76,6 +113,22 @@ void benchmark_function_void(const char* const step_name, const int32_t& column_ formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); } +/** + * @brief Either execute a function then save the result or load the already cached result. + * + * @tparam T Inner type of the resulting array + * @tparam F Signature of the function to call + * @tparam Args Arguments variadic of the function to call + * @param step_name Name of the function to log + * @param column_width Width of the column to print during logging + * @param filename Name of the filename where the result is saved + * @param force_redo Recall the function even if the result is already saved, ignored if result is not cached + * @param save_state Whether the computed result will be saved or not, ignore if loading already cached result + * @param out_dir Path of the directory to save the result + * @param fnc Function to call + * @param args Arguments to pass to the function to call + * @return The result of the called function + */ template np::Array state_saver(const char* const step_name, const int32_t& column_width, const char* const filename, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept { char filepath[BUFFER_SIZE] = { 0 }; @@ -83,7 +136,6 @@ np::Array state_saver(const char* const step_name, const int32_t& column_widt np::Array bin; if (!std::filesystem::exists(filepath) || force_redo) { - //bin = std::move(benchmark_function>(step_name, column_width, fnc, std::forward(args)...)); bin = benchmark_function>(step_name, column_width, fnc, std::forward(args)...); if(save_state){ #if __DEBUG == false @@ -101,13 +153,28 @@ np::Array state_saver(const char* const step_name, const int32_t& column_widt fprintf(stderr, "Loading results of %s\r", step_name); fflush(stderr); #endif - //bin = std::move(load(filepath)); bin = load(filepath); formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" }); } return bin; } +/** + * @brief Either execute a function then saves the results or load the already cached result. + * + * @tparam T Inner type of the resulting arrays + * @tparam F Signature of the function to call + * @tparam Args Arguments variadic of the function to call + * @param step_name Name of the function to log + * @param column_width Width of the column to print during logging + * @param filenames List of names of the filenames where the results are save + * @param force_redo Recall the function even if the results are already saved, ignored if results are not cached + * @param save_state Whether the computed results will be saved or not, ignored if loading already cached results + * @param out_dir Path of the directory to save the results + * @param fnc Function to call + * @param args Arguments to pass to the function to call + * @return The results of the called function + */ template std::array, N> state_saver(const char* const step_name, const int32_t& column_width, const std::vector& filenames, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept { char filepath[BUFFER_SIZE] = { 0 }; @@ -122,7 +189,6 @@ std::array, N> state_saver(const char* const step_name, const int32 std::array, N> bin; if (abs || force_redo) { - //bin = std::move(benchmark_function, N>>(step_name, column_width, fnc, std::forward(args)...)); bin = benchmark_function, N>>(step_name, column_width, fnc, std::forward(args)...); if (save_state){ #if __DEBUG == false @@ -145,25 +211,116 @@ std::array, N> state_saver(const char* const step_name, const int32 fflush(stderr); #endif size_t i = 0; - bin[i++] = std::move(load(filepath)); for (const char* const filename : filenames){ snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename); + bin[i++] = load(filepath); } formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" }); } return bin; } -np::Array argsort_2d_cpu(const np::Array&) noexcept; +/** + * @brief Initialize the features based on the input shape. + * + * @param width Width of the image + * @param height Height of the image + * @return The initialized features + */ np::Array build_features(const uint16_t&, const uint16_t&) noexcept; -np::Array select_percentile(const np::Array&, const np::Array&) noexcept; +//np::Array select_percentile(const np::Array&, const np::Array&) noexcept; + +/** + * @brief Classify the trained classifiers on the given features. + * + * @param alphas Trained alphas + * @param classifiers Trained classifiers + * @param X_feat integrated features + * @return Classification results + */ np::Array classify_viola_jones(const np::Array&, const np::Array&, const np::Array&) noexcept; + +/** + * @brief Initialize the weights of the weak classifiers based on the training labels. + * + * @param y_train Training labels + * @return The initialized weights + */ np::Array init_weights(const np::Array&) noexcept; + +/** + * @brief Select the best classifer given their predictions. + * + * @param classifiers The weak classifiers + * @param weights Trained weights of each classifiers + * @param X_feat Integrated features + * @param y Features labels + * @return Index of the best classifier, the best error and the best accuracy + */ std::tuple> select_best(const np::Array&, const np::Array&, const np::Array&, const np::Array&) noexcept; + +/** + * @brief Train the weak calssifiers. + * + * @param T Number of weak classifiers + * @param X_feat Integrated features + * @param X_feat_argsort Sorted indexes of the integrated features + * @param y Features labels + * @return List of trained alphas and the list of the final classifiers + */ std::array, 2> train_viola_jones(const size_t&, const np::Array&, const np::Array&, const np::Array&) noexcept; + +/** + * @brief Compute the accuracy score i.e. how a given set of measurements are close to their true value. + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed accuracy score + */ float64_t accuracy_score(const np::Array&, const np::Array&) noexcept; + +/** + * @brief Compute the precision score i.e. how a given set of measurements are close to each other. + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed precision score + */ float64_t precision_score(const np::Array&, const np::Array&) noexcept; + +/** + * @brief Compute the recall score i.e. the ratio (TP / (TP + FN)) where TP is the number of true positives and FN the number of false negatives. + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed recall score + */ float64_t recall_score(const np::Array&, const np::Array&) noexcept; + +/** + * @brief Compute the F1 score aka balanced F-score or F-measure. + * + * F1 = (2 * TP) / (2 * TP + FP + FN) + * where TP is the true positives, + * FP is the false positives, + * and FN is the false negatives + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed F1 score + */ float64_t f1_score(const np::Array&, const np::Array&) noexcept; + +/** + * @brief Compute the confusion matrix to evaluate a given classification. + * + * A confusion matrix of a binary classification consists of a 2x2 matrix containing + * | True negatives | False positives | + * | False negatives | True positives | + * + * @param y Ground truth labels + * @param y_pred Predicted labels + * @return computed confusion matrix + */ std::tuple confusion_matrix(const np::Array&, const np::Array&) noexcept; diff --git a/cpp/ViolaJonesCPU.cpp b/cpp/ViolaJonesCPU.cpp index 49cd654..06fe130 100644 --- a/cpp/ViolaJonesCPU.cpp +++ b/cpp/ViolaJonesCPU.cpp @@ -1,7 +1,15 @@ #include "data.hpp" -#include "toolbox.hpp" +#include "config.hpp" -np::Array set_integral_image_cpu(const np::Array& set) noexcept { +#if GPU_BOOSTED == false + +/** + * @brief Transform the input images in integrated images (CPU version). + * + * @param X Dataset of images + * @return Dataset of integrated images + */ +np::Array set_integral_image(const np::Array& set) noexcept { np::Array X_ii = np::empty(set.shape); size_t i, y, x, s; @@ -31,7 +39,14 @@ constexpr static inline int16_t __compute_feature__(const np::Array& X return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w]; } -np::Array apply_features_cpu(const np::Array& feats, const np::Array& X_ii) noexcept { +/** + * @brief Apply the features on a integrated image dataset (CPU version). + * + * @param feats Features to apply + * @param X_ii Integrated image dataset + * @return Applied features + */ +np::Array apply_features(const np::Array& feats, const np::Array& X_ii) noexcept { np::Array X_feat = np::empty({ feats.shape[0], X_ii.shape[0] }); size_t j, feat_idx = 0; @@ -51,7 +66,7 @@ np::Array apply_features_cpu(const np::Array& feats, const np: return X_feat; } -np::Array train_weak_clf_cpu(const np::Array& X_feat, const np::Array& X_feat_argsort, const np::Array& y, const np::Array& weights) noexcept { +np::Array train_weak_clf(const np::Array& X_feat, const np::Array& X_feat_argsort, const np::Array& y, const np::Array& weights) noexcept { float64_t total_pos = 0.0, total_neg = 0.0; for(size_t i = 0; i < y.shape[0]; ++i) (y[i] == static_cast(1) ? total_pos : total_neg) += weights[i]; @@ -81,7 +96,69 @@ np::Array train_weak_clf_cpu(const np::Array& X_feat, const return classifiers; } -np::Array argsort_2d_cpu(const np::Array& X_feat) noexcept { +/** + * @brief Perform an indirect sort of a given array within a given bound. + * + * @tparam T Inner type of the array + * @param a Array to sort + * @param indices Array of indices to write to + * @param low lower bound to sort + * @param high higher bound to sort + */ +template +static void argsort(const T* const a, uint16_t* const indices, size_t low, size_t high) noexcept { + const size_t total = high - low + 1; + + size_t* const stack = new size_t[total]{low, high}; + //size_t stack[total]; + //stack[0] = l; + //stack[1] = h; + size_t top = 1; + + while (top <= total) { + high = stack[top--]; + low = stack[top--]; + if(low >= high) + break; + + const size_t p = as_partition(a, indices, low, high); + + if (p - 1 > low && p - 1 < total) { + stack[++top] = low; + stack[++top] = p - 1; + } + + if (p + 1 < high) { + stack[++top] = p + 1; + stack[++top] = high; + } + } + delete[] stack; +} + +/** + * @brief Apply argsort to every column of a given 2D array. + * + * @tparam T Inner type of the array + * @param a 2D Array to sort + * @return 2D Array of indices that sort the array + */ +template +static np::Array argsort_bounded(const np::Array& a, const size_t& low, const size_t& high) noexcept { + np::Array indices = np::empty(a.shape); + map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; }); + + argsort_bounded(a, indices, low, high); + return indices; +} + +/** + * @brief Perform an indirect sort on each column of a given 2D array (CPU version). + * + * @param a 2D Array to sort + * @return 2D Array of indices that sort the array + */ +np::Array argsort_2d(const np::Array& X_feat) noexcept { const np::Array indices = np::empty(X_feat.shape); const size_t length = np::prod(X_feat.shape); for (size_t i = 0; i < length; i += X_feat.shape[1]) { @@ -91,3 +168,4 @@ np::Array argsort_2d_cpu(const np::Array& X_feat) noexcept { return indices; } +#endif // GPU_BOOSTED == false diff --git a/cpp/ViolaJonesCPU.hpp b/cpp/ViolaJonesCPU.hpp deleted file mode 100644 index e3d7c7c..0000000 --- a/cpp/ViolaJonesCPU.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once -#include "data.hpp" - -np::Array set_integral_image_cpu(const np::Array&) noexcept; -np::Array apply_features_cpu(const np::Array&, const np::Array&) noexcept; -np::Array train_weak_clf_cpu(const np::Array&, const np::Array&, const np::Array&, - const np::Array&) noexcept; -np::Array argsort_2d_cpu(const np::Array&) noexcept; diff --git a/cpp/ViolaJonesGPU.cu b/cpp/ViolaJonesGPU.cu index b491aed..2589ce4 100644 --- a/cpp/ViolaJonesGPU.cu +++ b/cpp/ViolaJonesGPU.cu @@ -1,5 +1,14 @@ #include "data.hpp" +#include "config.hpp" +#if GPU_BOOSTED + +/** + * @brief Prefix Sum (scan) of a given dataset. + * + * @param X Dataset of images to apply sum + * @return Scanned dataset of images + */ static np::Array __scanCPU_3d__(const np::Array& X) noexcept { np::Array X_scan = np::empty(X.shape); const size_t total = np::prod(X_scan.shape); @@ -16,6 +25,14 @@ static np::Array __scanCPU_3d__(const np::Array& X) noexcept return X_scan; } +/** + * @brief GPU kernel used to do a parallel prefix sum (scan). + * + * @param n Number of width blocks + * @param j Temporary sum index + * @param d_inter Temporary sums on device to add + * @param d_X Dataset of images on device to apply sum + */ static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np::Array d_inter, np::Array d_X) { const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x; const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y; @@ -60,6 +77,14 @@ static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] = sA[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y]; } +/** + * @brief GPU kernel for parallel sum. + * + * @param d_X Dataset of images on device + * @param d_s Temporary sums to add on device + * @param n Number of width blocks + * @param m Height of a block + */ static __global__ void __add_3d__(np::Array d_X, const np::Array d_s, const uint16_t n, const uint16_t m) { const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x; const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y; @@ -67,6 +92,14 @@ static __global__ void __add_3d__(np::Array d_X, const np::Array __scanGPU_3d__(const np::Array& X) noexcept { np::Array X_scan = np::empty(X.shape); @@ -112,6 +145,12 @@ static np::Array __scanGPU_3d__(const np::Array& X) noexcept return X_scan; } +/** + * @brief GPU kernel of the function __transpose_3d__. + * + * @param d_X Dataset of images on device + * @param d_Xt Transposed dataset of images on device + */ static __global__ void __transpose_kernel__(const np::Array d_X, np::Array d_Xt) { __shared__ uint32_t temp[NB_THREADS_2D_X * NB_THREADS_2D_Y]; @@ -128,6 +167,12 @@ static __global__ void __transpose_kernel__(const np::Array d_X, np::A d_Xt[blockIdx.z * d_Xt.shape[1] * d_Xt.shape[2] + x * d_X.shape[2] + y] = temp[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y]; } +/** + * @brief Transpose every images in the given dataset. + * + * @param X Dataset of images + * @return Transposed dataset of images + */ static np::Array __transpose_3d__(const np::Array& X) noexcept { np::Array Xt = np::empty({ X.shape[0], X.shape[2], X.shape[1] }); @@ -147,7 +192,13 @@ static np::Array __transpose_3d__(const np::Array& X) noexce return Xt; } -np::Array set_integral_image_gpu(const np::Array& X) noexcept { +/** + * @brief Transform the input images in integrated images (GPU version). + * + * @param X Dataset of images + * @return Dataset of integrated images + */ +np::Array set_integral_image(const np::Array& X) noexcept { np::Array X_ii = np::astype(X); X_ii = __scanCPU_3d__(X_ii); X_ii = __transpose_3d__(X_ii); @@ -155,53 +206,17 @@ np::Array set_integral_image_gpu(const np::Array& X) noexcept return __transpose_3d__(X_ii); } -static inline __device__ int16_t __compute_feature__(const np::Array& d_X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept { - const size_t _y = y * d_X_ii.shape[1] + x; - const size_t _yh = _y + h * d_X_ii.shape[1]; - return d_X_ii[j + _yh + w] + d_X_ii[j + _y] - d_X_ii[j + _yh] - d_X_ii[j + _y + w]; -} - -static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Array d_feats, const np::Array d_X_ii) { - size_t i = blockIdx.x * blockDim.x + threadIdx.x; - size_t j = blockIdx.y * blockDim.y + threadIdx.y; - - if (i >= d_feats.shape[0] || j >= d_X_ii.shape[0]) - return; - - const size_t k = i * d_X_ii.shape[0] + j; - i *= np::prod(d_feats.shape, 1); - j *= np::prod(d_X_ii.shape, 1); - const int16_t p1 = __compute_feature__(d_X_ii, j, d_feats[i + 0], d_feats[i + 1], d_feats[i + 2], d_feats[i + 3]); - const int16_t p2 = __compute_feature__(d_X_ii, j, d_feats[i + 4], d_feats[i + 5], d_feats[i + 6], d_feats[i + 7]); - const int16_t n1 = __compute_feature__(d_X_ii, j, d_feats[i + 8], d_feats[i + 9], d_feats[i + 10], d_feats[i + 11]); - const int16_t n2 = __compute_feature__(d_X_ii, j, d_feats[i + 12], d_feats[i + 13], d_feats[i + 14], d_feats[i + 15]); - d_X_feat[k] = static_cast(p1 + p2) - static_cast(n1 + n2); -} - -np::Array apply_features_gpu(const np::Array& feats, const np::Array& X_ii) noexcept { - const np::Array X_feat = np::empty({ feats.shape[0], X_ii.shape[0] }); - int32_t* d_X_feat = nullptr; - - _print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t))); - np::Array d_X_ii = copyToDevice("X_ii", X_ii); - np::Array d_feats = copyToDevice("feats", feats); - - const size_t dimX = static_cast(std::ceil(static_cast(feats.shape[0]) / static_cast(NB_THREADS_2D_X))); - const size_t dimY = static_cast(std::ceil(static_cast(X_ii.shape[0]) / static_cast(NB_THREADS_2D_Y))); - const dim3 dimGrid(dimX, dimY); - constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y); - __apply_feature_kernel__<<>>(d_X_feat, d_feats, d_X_ii); - _print_cuda_error_("synchronize", cudaDeviceSynchronize()); - - _print_cuda_error_("memcpy X_feat", cudaMemcpy(X_feat.data, d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t), cudaMemcpyDeviceToHost)); - - _print_cuda_error_("free d_X_feat", cudaFree(d_X_feat)); - cudaFree("free d_feats", d_feats); - cudaFree("free d_X_11", d_X_ii); - - return X_feat; -} - +/** + * @brief GPU kernel of the function train_weak_clf. + * + * @param d_classifiers Weak classifiers on device to train + * @param d_y Labels of the features on device + * @param d_X_feat Feature images dataset on device + * @param d_X_feat_argsort Sorted indexes of the integrated features on device + * @param d_weights Weights of the features on device + * @param total_pos Total of positive labels in the dataset + * @param total_neg Total of negative labels in the dataset + */ static __global__ void __train_weak_clf_kernel__(np::Array d_classifiers, const np::Array d_y, const np::Array d_X_feat, const np::Array d_X_feat_argsort, const np::Array d_weights, const float64_t total_pos, const float64_t total_neg) { @@ -210,7 +225,7 @@ static __global__ void __train_weak_clf_kernel__(np::Array d_classifi i += threadIdx.x * blockDim.y * blockDim.z; i += threadIdx.y * blockDim.z; i += threadIdx.z; - // const size_t i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= d_classifiers.shape[0]) return; @@ -235,7 +250,16 @@ static __global__ void __train_weak_clf_kernel__(np::Array d_classifi d_classifiers[i * 2] = best_threshold; d_classifiers[i * 2 + 1] = best_polarity; } -np::Array train_weak_clf_gpu(const np::Array& X_feat, const np::Array& X_feat_argsort, const np::Array& y, +/** + * @brief Train the weak classifiers on a given dataset (GPU version). + * + * @param X_feat Feature images dataset + * @param X_feat_argsort Sorted indexes of the integrated features + * @param y Labels of the features + * @param weights Weights of the features + * @return Trained weak classifiers + */ +np::Array train_weak_clf(const np::Array& X_feat, const np::Array& X_feat_argsort, const np::Array& y, const np::Array& weights) noexcept { float64_t total_pos = 0.0, total_neg = 0.0; for(size_t i = 0; i < y.shape[0]; ++i) @@ -251,8 +275,6 @@ np::Array train_weak_clf_gpu(const np::Array& X_feat, const const size_t n_blocks = static_cast(std::ceil(static_cast(X_feat.shape[0]) / static_cast(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z))); constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z); - // const size_t n_blocks = static_cast(std::ceil(static_cast(X_feat.shape[0]) / static_cast(NB_THREADS))); - // constexpr const dim3 dimBlock(NB_THREADS); __train_weak_clf_kernel__<<>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg); _print_cuda_error_("synchronize", cudaDeviceSynchronize()); @@ -267,28 +289,118 @@ np::Array train_weak_clf_gpu(const np::Array& X_feat, const return classifiers; } +/** + * @brief Compute a feature on a integrated image at a specific coordinate (GPU version). + * + * @param d_X_ii Dataset of integrated images on device + * @param j Image index in the dataset + * @param x X coordinate + * @param y Y coordinate + * @param w width of the feature + * @param h height of the feature + */ +static inline __device__ int16_t __compute_feature__(const np::Array& d_X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept { + const size_t _y = y * d_X_ii.shape[1] + x; + const size_t _yh = _y + h * d_X_ii.shape[1]; + return d_X_ii[j + _yh + w] + d_X_ii[j + _y] - d_X_ii[j + _yh] - d_X_ii[j + _y + w]; +} + +/** + * @brief GPU kernel of the function apply_features. + * + * @param d_X_feat Dataset of image features on device + * @param d_feats Features on device to apply + * @param d_X_ii Integrated image dataset on device + */ +static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Array d_feats, const np::Array d_X_ii) { + size_t i = blockIdx.x * blockDim.x + threadIdx.x; + size_t j = blockIdx.y * blockDim.y + threadIdx.y; + + if (i >= d_feats.shape[0] || j >= d_X_ii.shape[0]) + return; + + const size_t k = i * d_X_ii.shape[0] + j; + i *= np::prod(d_feats.shape, 1); + j *= np::prod(d_X_ii.shape, 1); + const int16_t p1 = __compute_feature__(d_X_ii, j, d_feats[i + 0], d_feats[i + 1], d_feats[i + 2], d_feats[i + 3]); + const int16_t p2 = __compute_feature__(d_X_ii, j, d_feats[i + 4], d_feats[i + 5], d_feats[i + 6], d_feats[i + 7]); + const int16_t n1 = __compute_feature__(d_X_ii, j, d_feats[i + 8], d_feats[i + 9], d_feats[i + 10], d_feats[i + 11]); + const int16_t n2 = __compute_feature__(d_X_ii, j, d_feats[i + 12], d_feats[i + 13], d_feats[i + 14], d_feats[i + 15]); + d_X_feat[k] = static_cast(p1 + p2) - static_cast(n1 + n2); +} + +/** + * @brief Apply the features on a integrated image dataset (GPU version). + * + * @param feats Features to apply + * @param X_ii Integrated image dataset + * @return Applied features + */ +np::Array apply_features(const np::Array& feats, const np::Array& X_ii) noexcept { + const np::Array X_feat = np::empty({ feats.shape[0], X_ii.shape[0] }); + int32_t* d_X_feat = nullptr; + + _print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t))); + np::Array d_X_ii = copyToDevice("X_ii", X_ii); + np::Array d_feats = copyToDevice("feats", feats); + + const size_t dimX = static_cast(std::ceil(static_cast(feats.shape[0]) / static_cast(NB_THREADS_2D_X))); + const size_t dimY = static_cast(std::ceil(static_cast(X_ii.shape[0]) / static_cast(NB_THREADS_2D_Y))); + const dim3 dimGrid(dimX, dimY); + constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y); + __apply_feature_kernel__<<>>(d_X_feat, d_feats, d_X_ii); + _print_cuda_error_("synchronize", cudaDeviceSynchronize()); + + _print_cuda_error_("memcpy X_feat", cudaMemcpy(X_feat.data, d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t), cudaMemcpyDeviceToHost)); + + _print_cuda_error_("free d_X_feat", cudaFree(d_X_feat)); + cudaFree("free d_feats", d_feats); + cudaFree("free d_X_11", d_X_ii); + + return X_feat; +} + +/** + * @brief Partition of the argsort algorithm. + * + * @tparam T Inner type of the array + * @param d_a Array on device to sort + * @param d_indices Array of indices on device to write to + * @param low lower bound to sort + * @param high higher bound to sort + * @return Last index sorted + */ template -__device__ inline static int32_t as_partition_gpu(const T* a, uint16_t* const indices, const size_t l, const size_t h) noexcept { - int32_t i = l - 1; - for (int32_t j = l; j <= h; ++j) - if (a[indices[j]] < a[indices[h]]) - swap(&indices[++i], &indices[j]); - swap(&indices[++i], &indices[h]); +__device__ inline static int32_t _as_partition_(const T* d_a, uint16_t* const d_indices, const size_t low, const size_t high) noexcept { + int32_t i = low - 1; + for (int32_t j = low; j <= high; ++j) + if (d_a[d_indices[j]] < d_a[d_indices[high]]) + swap(&d_indices[++i], &d_indices[j]); + swap(&d_indices[++i], &d_indices[high]); return i; } +/** + * @brief Cuda kernel to perform an indirect sort of a given array within a given bound. + * + * @tparam T Inner type of the array + * @param d_a Array on device to sort + * @param d_indices Array of indices on device to write to + * @param low lower bound to sort + * @param high higher bound to sort + */ template -__device__ void argsort_gpu(const T* a, uint16_t* const indices, const size_t l, const size_t h) noexcept { - const size_t total = h - l + 1; +__device__ void argsort_kernel(const T* d_a, uint16_t* const d_indices, size_t low, size_t high) noexcept { + const size_t total = high - low + 1; - //int32_t* stack = new int32_t[total]{l, h}; + //int32_t* stack = new int32_t[total]{low, high}; //int32_t stack[total]; int32_t stack[6977]; //int32_t stack[1<<16]; - stack[0] = l; - stack[1] = h; + stack[0] = low; + stack[1] = high; - size_t top = 1, low = l, high = h; + size_t top = 1; while (top <= total) { high = stack[top--]; @@ -296,7 +408,7 @@ __device__ void argsort_gpu(const T* a, uint16_t* const indices, const size_t l, if(low >= high) break; - const int32_t p = as_partition_gpu(a, indices, low, high); + const int32_t p = _as_partition_(d_a, d_indices, low, high); if (p - 1 > low && p - 1 < total) { stack[++top] = low; @@ -311,42 +423,49 @@ __device__ void argsort_gpu(const T* a, uint16_t* const indices, const size_t l, //delete[] stack; } +/** + * @brief Cuda kernel where argsort is applied to every column of a given 2D array. + * + * @tparam T Inner type of the array + * @param d_a 2D Array on device to sort + * @param d_indices 2D Array of indices on device to write to + */ template -__global__ void argsort_bounded_gpu(const np::Array a, uint16_t* const indices){ +__global__ void argsort_bounded(const np::Array d_a, uint16_t* const d_indices){ const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= a.shape[0]) + if (idx >= d_a.shape[0]) return; - for(size_t y = 0; y < a.shape[1]; ++y) indices[idx * a.shape[1] + y] = y; - argsort_gpu(&a[idx * a.shape[1]], &indices[idx * a.shape[1]], 0, a.shape[1] - 1); + for(size_t y = 0; y < d_a.shape[1]; ++y) d_indices[idx * d_a.shape[1] + y] = y; + argsort_kernel(&d_a[idx * d_a.shape[1]], &d_indices[idx * d_a.shape[1]], 0, d_a.shape[1] - 1); } -np::Array argsort_2d_gpu(const np::Array& X_feat) noexcept { - const np::Array indices = np::empty(X_feat.shape); +/** + * @brief Perform an indirect sort on each column of a given 2D array (GPU version). + * + * @param a 2D Array to sort + * @return 2D Array of indices that sort the array + */ +np::Array argsort_2d(const np::Array& a) noexcept { + const np::Array indices = np::empty(a.shape); uint16_t* d_indices = nullptr; const size_t indices_size = np::prod(indices.shape) * sizeof(uint16_t); - np::Array d_X_feat = copyToDevice("X_feat", X_feat); + np::Array d_a = copyToDevice("X_feat", a); _print_cuda_error_("malloc d_indices", cudaMalloc(&d_indices, indices_size)); - const size_t dimGrid = static_cast(std::ceil(static_cast(X_feat.shape[0]) / static_cast(NB_THREADS))); + const size_t dimGrid = static_cast(std::ceil(static_cast(a.shape[0]) / static_cast(NB_THREADS))); const dim3 dimBlock(NB_THREADS); - argsort_bounded_gpu<<>>(d_X_feat, d_indices); + argsort_bounded<<>>(d_a, d_indices); _print_cuda_error_("synchronize", cudaDeviceSynchronize()); _print_cuda_error_("memcpy d_indices", cudaMemcpy(indices.data, d_indices, indices_size, cudaMemcpyDeviceToHost)); - cudaFree("free d_X_feat", d_X_feat); + cudaFree("free d_a", d_a); _print_cuda_error_("free d_indices", cudaFree(d_indices)); return indices; } -__host__ __device__ -size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept { - size_t result = shape[offset]; - for(size_t i = 1 + offset; i < shape.length; ++i) - result *= shape[i]; - return result; -} +#endif // GPU_BOOSTED diff --git a/cpp/ViolaJonesGPU.hpp b/cpp/ViolaJonesGPU.hpp deleted file mode 100644 index 0b16fbe..0000000 --- a/cpp/ViolaJonesGPU.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once -#include "data.hpp" - -np::Array set_integral_image_gpu(const np::Array&) noexcept; -np::Array apply_features_gpu(const np::Array&, const np::Array&) noexcept; -np::Array train_weak_clf_gpu(const np::Array& X_feat, const np::Array& X_feat_argsort, const np::Array& y, - const np::Array& weights) noexcept; -np::Array argsort_2d_gpu(const np::Array& X_feat) noexcept; diff --git a/cpp/ViolaJones_device.hpp b/cpp/ViolaJones_device.hpp new file mode 100644 index 0000000..1647f70 --- /dev/null +++ b/cpp/ViolaJones_device.hpp @@ -0,0 +1,39 @@ +#pragma once +#include "data.hpp" + +/** + * @brief Transform the input images in integrated images. + * + * @param X Dataset of images + * @return Dataset of integrated images + */ +np::Array set_integral_image(const np::Array&) noexcept; + +/** + * @brief Apply the features on a integrated image dataset. + * + * @param feats Features to apply + * @param X_ii Integrated image dataset + * @return Applied features + */ +np::Array apply_features(const np::Array&, const np::Array&) noexcept; + +/** + * @brief Train the weak classifiers on a given dataset. + * + * @param X_feat Feature images dataset + * @param X_feat_argsort Sorted indexes of the integrated features + * @param y Labels of the features + * @param weights Weights of the features + * @return Trained weak classifiers + */ +np::Array train_weak_clf(const np::Array&, const np::Array&, const np::Array&, + const np::Array&) noexcept; + +/** + * @brief Perform an indirect sort on each column of a given 2D array + * + * @param a 2D Array to sort + * @return 2D Array of indices that sort the array + */ +np::Array argsort_2d(const np::Array&) noexcept; diff --git a/cpp/data.cpp b/cpp/data.cpp index 38b13f0..941cb18 100644 --- a/cpp/data.cpp +++ b/cpp/data.cpp @@ -16,14 +16,6 @@ int32_t print(const np::Shape& shape) noexcept { template int32_t print(const np::Array& array, const char* const format) noexcept { - //printf("["); - //const size_t length = np::prod(array.shape); - //for(size_t i = 0; i < length - 1; ++i) - // //std::cout << array[i] << " "; - // printf("%f ", array[i]); - ////std::cout << array[array.shape[0] - 1] << "]\n"; - //printf("%f]\n", array[length - 1]); - char format_space[BUFFER_SIZE] = { 0 }; snprintf(format_space, BUFFER_SIZE,"%s ", format); char format_close[BUFFER_SIZE] = { 0 }; @@ -75,7 +67,7 @@ int32_t print_feat(const np::Array& array, const np::Slice& slice) noex int32_t print(const np::Array& array, const np::Slice& slice) noexcept { int32_t num_written = 0; if (array.shape.length == 1) { - const size_t max = slice.y - 1; //std::min(slice.y, array.shape[0] - 1); + const size_t max = slice.y - 1; num_written += printf("["); for (size_t i = slice.x; i < max; ++i) num_written += printf("%hu ", array[i]); @@ -98,7 +90,7 @@ int32_t print(const np::Array& array, const np::Slice& slice) noexcept int32_t print(const np::Array& array, const np::Slice& slice) noexcept { int32_t num_written = 0; if (array.shape.length == 1) { - const size_t max = slice.y - 1; //std::min(slice.y, array.shape[0] - 1); + const size_t max = slice.y - 1; num_written += printf("["); for (size_t i = slice.x; i < max; ++i) num_written += printf("%iu ", array[i]); @@ -121,7 +113,6 @@ int32_t print(const np::Array& array, const np::Slice& slice) noexcept int32_t print(const np::Array& array, const np::Slice& slice) noexcept { int32_t num_written = 0; num_written += printf("["); - //size_t k = slice.x * array.shape[1] * array.shape[2] + slice.y * array.shape[2] + slice.z; size_t k = slice.x * array.shape[1]; for (size_t i = k; i < k + (slice.y - slice.x); ++i) { num_written += printf("%5i ", array[i]); @@ -133,7 +124,6 @@ int32_t print(const np::Array& array, const np::Slice& slice) noexcept int32_t print(const np::Array& array, const np::Slice& slice) noexcept { int32_t num_written = 0; num_written += printf("["); - //size_t k = slice.x * array.shape[1] * array.shape[2] + slice.y * array.shape[2] + slice.z; size_t k = slice.x * array.shape[1]; for (size_t i = k; i < k + (slice.y - slice.x); ++i) { num_written += printf("%5hu ", array[i]); @@ -171,7 +161,6 @@ static inline np::Array load_set(const char* const set_name) { if (c == ' ' || c == '\n') { buff[j] = '\0'; a[i++] = static_cast(atoi(buff)); - //memset(buff, 0, STRING_INT_SIZE); j = 0; } else @@ -189,6 +178,11 @@ static inline np::Array load_set(const char* const set_name) { return a; } +/** + * @brief Load the datasets. + * + * @return Array containing X_train, y_trait, X_test, y_test + */ std::array, 4> load_datasets(void) { return { load_set(DATA_DIR "/X_train.bin"), load_set(DATA_DIR "/y_train.bin"), @@ -201,10 +195,3 @@ void print_error_file(const char* const file_dir) noexcept { fprintf(stderr, "Can't open %s, error code = %d : %s\n", file_dir, errno, buff); // delete buff; } - -//size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept { -// size_t result = shape[offset]; -// for(size_t i = 1 + offset; i < shape.length; ++i) -// result *= shape[i]; -// return result; -//} diff --git a/cpp/data.hpp b/cpp/data.hpp index 871e032..101cac5 100644 --- a/cpp/data.hpp +++ b/cpp/data.hpp @@ -35,10 +35,10 @@ namespace np { #endif __host__ __device__ -// #if __DEBUG -// print("Shape created (default)"); -// #endif Shape(void) noexcept { +#if __DEBUG + printf("Shape created (default)\n"); +#endif } __host__ __device__ @@ -52,9 +52,9 @@ namespace np { __host__ __device__ Shape(const std::initializer_list& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) { -// #if __DEBUG -// print("Shape created (initializer)"); -// #endif +#if __DEBUG + printf("Shape created (initializer)\n"); +#endif const size_t* const begin = dims.begin(); for(size_t i = 0; i < length; ++i){ data[i] = begin[i]; @@ -98,9 +98,9 @@ namespace np { __host__ __device__ Shape(Shape&& shape) noexcept { -// #if __DEBUG -// print("Shape created (move)); -// #endif +#if __DEBUG + printf("Shape created (move)\n"); +#endif if (data != nullptr && data != shape.data){ #if __DEBUG printf("Former shape deleted (move)\n"); @@ -129,27 +129,27 @@ namespace np { __host__ __device__ ~Shape(void) noexcept { if(refcount == nullptr){ -// #if __DEBUG -// print("Shape refcount freed more than once"); -// #endif +#if __DEBUG + printf("Shape refcount freed more than once\n"); +#endif return; } --(*refcount); -// #if __DEBUG -// printf("Shape destructed : %lu\n", *refcount); -// #endif +#if __DEBUG + printf("Shape destructed : %lu\n", *refcount); +#endif if(*refcount == 0){ if (data != nullptr){ delete[] data; data = nullptr; -// #if __DEBUG -// print("Shape freeing ..."); -// #endif +#if __DEBUG + printf("Shape freeing ...\n"); +#endif } -//#if __DEBUG +#if __DEBUG else printf("Shape freed more than once : %lu\n", *refcount); -//#endif +#endif delete refcount; refcount = nullptr; #if __DEBUG @@ -191,9 +191,9 @@ namespace np { __host__ __device__ Shape& operator=(Shape&& shape) noexcept { -// #if __DEBUG -// print("Shape created (assign move)"); -// #endif +#if __DEBUG + printf("Shape created (assign move)\n"); +#endif if (data != nullptr && data != shape.data){ #if __DEBUG printf("Former shape deleted (assign move)\n"); @@ -210,6 +210,8 @@ namespace np { data = shape.data; refcount = shape.refcount; #if __DEBUG + if (refcount == nullptr) + printf("Assigned copy shape has null refcount\n"); total = shape.total; shape.total = 1; #endif @@ -261,37 +263,38 @@ namespace np { size_t* refcount = nullptr; __host__ __device__ -// #if __DEBUG -// print("Array created (default)"); Array(void) noexcept { +#if __DEBUG + printf("Array created (default)\n"); +#endif } __host__ __device__ -// #if __DEBUG -// print("Array created (raw, copy shape)"); -// #endif Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { +#if __DEBUG + printf("Array created (raw, copy shape)\n"); +#endif } __host__ __device__ Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { -// #if __DEBUG -// print("Array created (raw empty, copy shape)"); -// #endif +#if __DEBUG + printf("Array created (raw empty, copy shape)\n"); +#endif } __host__ __device__ -// #if __DEBUG -// print("Array created (raw, move shape)"); -// #endif Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { +#if __DEBUG + printf("Array created (raw, move shape)\n"); +#endif } __host__ __device__ -// #if __DEBUG -// print("Array created (raw empty, move shape)"); -// #endif Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { +#if __DEBUG + printf("Array created (raw empty, move shape)\n"); +#endif } __host__ __device__ @@ -323,10 +326,10 @@ namespace np { } __host__ __device__ -// #if __DEBUG -// print("Array created (move)"); -// #endif Array(Array&& array) noexcept : shape(std::move(array.shape)) { +#if __DEBUG + printf("Array created (move)\n"); +#endif if (data != nullptr && data != array.data){ #if __DEBUG printf("Former array deleted (move)\n"); @@ -349,22 +352,22 @@ namespace np { __host__ __device__ ~Array(void) noexcept { if(refcount == nullptr){ -// #if __DEBUG -// print("Array refcount freed more than once"); -// #endif +#if __DEBUG + printf("Array refcount freed more than once\n"); +#endif return; } --(*refcount); -// #if __DEBUG -// printf("Array destructed : %lu\n", *refcount); -// #endif +#if __DEBUG + printf("Array destructed : %lu\n", *refcount); +#endif if(*refcount == 0){ if (data != nullptr){ delete[] data; data = nullptr; -// #if __DEBUG -// print("Array freeing ..."); -// #endif +#if __DEBUG + printf("Array freeing ...\n"); +#endif } #if __DEBUG else @@ -400,16 +403,16 @@ namespace np { (*refcount)++; #if __DEBUG else -#endif printf("Assigned array has null refcount\n"); +#endif return *this; } __host__ __device__ Array& operator=(Array&& array) noexcept { -// #if __DEBUG -// print("Array created (assign move)"); -// #endif +#if __DEBUG + printf("Array created (assign move)\n"); +#endif if (data != nullptr && data != array.data){ #if __DEBUG printf("Former array deleted (assign move)\n"); @@ -786,48 +789,6 @@ static size_t as_partition(const T* const a, uint16_t* const indices, const size return i; } -template -void argsort(const T* const a, uint16_t* const indices, const size_t& l, const size_t& h) noexcept { - const size_t total = h - l + 1; - - size_t* const stack = new size_t[total]{l, h}; - size_t top = 1, low = l, high = h; - - while (top <= total) { - high = stack[top--]; - low = stack[top--]; - if(low >= high) - break; - - const size_t p = as_partition(a, indices, low, high); - - if (p - 1 > low && p - 1 < total) { - stack[++top] = low; - stack[++top] = p - 1; - } - - if (p + 1 < high) { - stack[++top] = p + 1; - stack[++top] = high; - } - } - delete[] stack; -} - -template -np::Array argsort(const np::Array& other, const size_t& l, const size_t& h) noexcept { - np::Array indices = np::empty(other.shape); - map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; }); - - argsort(other, indices, l, h); - return indices; -} - -template -np::Array argsort(const np::Array* const other, const size_t& length) noexcept { - return argsort(other, 0, length - 1); -} - std::array, 4> load_datasets(void); void print_error_file(const char* const) noexcept; @@ -838,10 +799,10 @@ void save(const np::Array& d, const char* const filename) { print_error_file(filename); throw; } - assert(d.shape.refcount != 0);//, "Refcount shape is zero !!"); + assert(d.shape.refcount != 0); fwrite(&d.shape.length, sizeof(size_t), 1, output); fwrite(d.shape.data, sizeof(size_t), d.shape.length, output); - assert(d.refcount != 0);//, "Refcount array is zero !!"); + assert(d.refcount != 0); fwrite(d.data, sizeof(T), np::prod(d.shape), output); fclose(output); } diff --git a/cpp/data_device.cu b/cpp/data_device.cu new file mode 100644 index 0000000..1fc1504 --- /dev/null +++ b/cpp/data_device.cu @@ -0,0 +1,16 @@ +#include "data.hpp" + +/** + * @brief Product of every elements in a given shape after a given offset. + * + * @param shape Shape to product over + * @param offset Skip offset + * @return Scalar product + */ +__host__ __device__ +size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept { + size_t result = shape[offset]; + for(size_t i = 1 + offset; i < shape.length; ++i) + result *= shape[i]; + return result; +} diff --git a/cpp/projet.cpp b/cpp/projet.cpp index a0e06f9..50af60a 100644 --- a/cpp/projet.cpp +++ b/cpp/projet.cpp @@ -4,20 +4,13 @@ #include "config.hpp" #include "toolbox_unit_test.hpp" #include "ViolaJones.hpp" +#include "ViolaJones_device.hpp" #if GPU_BOOSTED -#include "ViolaJonesGPU.hpp" #include "gpu_unit_test.hpp" #define LABEL "GPU" -#define apply_features apply_features_gpu -#define set_integral_image set_integral_image_gpu -#define argsort_2d argsort_2d_gpu #else -#include "ViolaJonesCPU.hpp" #define LABEL "CPU" -#define apply_features apply_features_cpu -#define set_integral_image set_integral_image_cpu -#define argsort_2d argsort_2d_cpu #endif /** @@ -28,7 +21,7 @@ * - Calculate features * - Calculate integral images * - Apply features to images - * - Calculate argsort of the featured images. + * - Calculate argsort of the featured images * * @return std::tuple, np::Array, np::Array, np::Array, np::Array> Tuple containing in order : training features, training features sorted indexes, training labels, testing features, testing labels */ @@ -40,7 +33,7 @@ std::tuple, np::Array, np::Array, np::Arra const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns(); const std::array preproc_gaps = { 49, -18, 29 }; - header({ "Preprocessing", "Time spent (ns)", "Formatted time spent" }, preproc_gaps); + header(preproc_gaps, { "Preprocessing", "Time spent (ns)", "Formatted time spent" }); const auto [ X_train, y_train, X_test, y_test ] = state_saver("Loading sets", preproc_gaps[0], { "X_train", "y_train", "X_test", "y_test" }, FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets); @@ -97,8 +90,7 @@ std::tuple, np::Array, np::Array, np::Arra print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif - // const np::Array indices = measure_time_save>("Selecting best features", preproc_gaps[0], "indices", select_percentile, X_train_feat, d.y_train); - // const np::Array indices = measure_time>("Selecting best features", preproc_gaps[0], select_percentile, X_train_feat, d.y_train); + // const np::Array indices = state_saver("Selecting best features", preproc_gaps[0], "indices", select_percentile, X_train_feat, d.y_train); #if __DEBUG // print_feature(indices); @@ -113,13 +105,13 @@ std::tuple, np::Array, np::Array, np::Arra print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif - // const np::Array X_test_feat_argsort = state_saver("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL, - // FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat); + const np::Array X_test_feat_argsort = state_saver("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL, + FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat); #if __DEBUG - // printf("X_test_feat_argsort\n"); - // print(X_test_feat_argsort.shape); - // print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); + printf("X_test_feat_argsort\n"); + print(X_test_feat_argsort.shape); + print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp); formatted_line(preproc_gaps, "├", "┼", "─", "┤"); @@ -131,15 +123,15 @@ std::tuple, np::Array, np::Array, np::Arra /** * @brief Train the weak classifiers. * - * @param X_train_feat Training images. - * @param X_train_feat_argsort Sorted indexes of the training images features. - * @param y_train Training labels. - * @return Trained models + * @param X_train_feat Training images + * @param X_train_feat_argsort Sorted indexes of the training images features + * @param y_train Training labels + * @return List of trained models */ std::array, 2>, TS.size()> train(const np::Array& X_train_feat, const np::Array& X_train_feat_argsort, const np::Array& y_train) noexcept { const std::chrono::system_clock::time_point training_timestamp = perf_counter_ns(); const std::array training_gaps = { 26, -18, 29 }; - header({ "Training", "Time spent (ns)", "Formatted time spent" }, training_gaps); + header(training_gaps, { "Training", "Time spent (ns)", "Formatted time spent" }); std::array, 2>, TS.size()> models; @@ -173,14 +165,15 @@ std::array, 2>, TS.size()> train(const np::Array /** * @brief Benchmark the trained classifiers on the training and testing sets. * - * @param X_train_feat Training features. - * @param y_train Training labels. - * @param X_test_feat Testing features. - * @param y_test Testing labels. + * @param models List of trained models + * @param X_train_feat Training features + * @param y_train Training labels + * @param X_test_feat Testing features + * @param y_test Testing labels */ void testing_and_evaluating(const std::array, 2>, TS.size()>& models, const np::Array& X_train_feat, const np::Array& y_train, const np::Array& X_test_feat, const np::Array& y_test) { const std::array testing_gaps = { 26, -19, 24, -19, 24 }; - header({ "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" }, testing_gaps); + header(testing_gaps, { "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" }); std::array, TS.size()> results; size_t i = 0; @@ -216,7 +209,7 @@ void testing_and_evaluating(const std::array, 2> footer(testing_gaps); const std::array evaluating_gaps = { 19, -7, -6, -6, -6, -7, -6, -6, -6 }; - header({ "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"}, evaluating_gaps); + header(evaluating_gaps, { "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"}); i = 0; for (const size_t T : TS) { @@ -231,14 +224,13 @@ void testing_and_evaluating(const std::array, 2> /** * @brief Test if the each result is equals to other devices. * - * Given ViolaJones is a fully deterministic algorithm. - * The results, regardless the device, should be the same. - * This function check this assertion. + * Given ViolaJones is a fully deterministic algorithm. The results, regardless the device, should be the same, + * this function check this assertion. */ void unit_test(void) { const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns(); const std::array unit_gaps = { 37, -10, -18, 29}; - header({ "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" }, unit_gaps); + header(unit_gaps, { "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" }); char title[BUFFER_SIZE] = { 0 }; char tmp_title[BUFFER_SIZE / 2] = { 0 }; @@ -300,7 +292,7 @@ void unit_test(void) { snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label); snprintf(title, BUFFER_SIZE, "%-22s - CPU argsort", tmp_title); test_fnc(title, [&X_feat, &X_feat_argsort_cpu, &file_cpu]{ - X_feat_argsort_cpu = std::move(load(file_cpu)); + X_feat_argsort_cpu = load(file_cpu); return unit_test_argsort_2d(X_feat, X_feat_argsort_cpu); }); } @@ -311,7 +303,7 @@ void unit_test(void) { snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label); snprintf(title, BUFFER_SIZE, "%-22s - GPU argsort", tmp_title); test_fnc(title, [&X_feat, &X_feat_argsort_gpu, &file_gpu]{ - X_feat_argsort_gpu = std::move(load(file_gpu)); + X_feat_argsort_gpu = load(file_gpu); return unit_test_argsort_2d(X_feat, X_feat_argsort_gpu); }); } @@ -355,7 +347,7 @@ int32_t main(void){ const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns(); const std::array unit_gaps = { 27, -18, 29 }; - header({ "Unit testing", "Time spent (ns)", "Formatted time spent" }, unit_gaps); + header(unit_gaps, { "Unit testing", "Time spent (ns)", "Formatted time spent" }); #if GPU_BOOSTED benchmark_function_void("Testing GPU capabilities 1D", unit_gaps[0], test_working, 50000); benchmark_function_void("Testing GPU capabilities 2D", unit_gaps[0], test_working_2d, 200, 500); diff --git a/cpp/toolbox.cpp b/cpp/toolbox.cpp index 161410d..acf16de 100644 --- a/cpp/toolbox.cpp +++ b/cpp/toolbox.cpp @@ -11,7 +11,7 @@ static const constexpr std::array time_numbers = { 1, u64(1e3 * @brief Format the time in seconds in human readable format. * * @param time number of seconds - * @return The formatted human readable string. + * @return The formatted human readable string */ std::string format_time(uint64_t time) noexcept { if (time == 0) @@ -38,7 +38,7 @@ std::string format_time(uint64_t time) noexcept { * @brief Format the time in nanoseconds in human readable format. * * @param time Time in nanoseconds - * @return std::string The formatted human readable string. + * @return std::string The formatted human readable string */ std::string format_time_ns(uint64_t time) noexcept { if (time == 0) @@ -115,13 +115,4 @@ std::string thousand_sep(uint64_t k, const char& separator) noexcept { } return s; - - //uint64_t len = n.length(), dlen = 3; - - //while (len > dlen) { - // n.insert(len - dlen, 1, separator); - // dlen += 4; - // len += 1; - //} - //return n; } diff --git a/cpp/toolbox.hpp b/cpp/toolbox.hpp index 004d7ad..b9adca0 100644 --- a/cpp/toolbox.hpp +++ b/cpp/toolbox.hpp @@ -3,6 +3,13 @@ #include #include +/** + * @brief Print a formatted row of titles with of gaps seperated by a separator. + * + * @param gaps List of size gaps + * @param titles List of titles + * @param separator Separator character between each gap + */ template constexpr void formatted_row(const std::array& gaps, const std::array& titles, const char* const separator = "│") noexcept { @@ -11,10 +18,19 @@ constexpr void formatted_row(const std::array& gaps, const std::arra printf("%s\n", separator); } +/** + * @brief Print a formatted line of repeated characters. + * + * @param gaps List of size gaps + * @param right Character on the left + * @param middle Character between each separator + * @param separator Separator character between each gap + * @param left Character on the right + */ template -constexpr void formatted_line(const std::array& gaps, const char* const right, const char* const middle, - const char* const separator, const char* const left) noexcept { - printf("%s", right); +constexpr void formatted_line(const std::array& gaps, const char* const left, const char* const middle, + const char* const separator, const char* const right) noexcept { + printf("%s", left); for(size_t i = 0; i < N; ++i){ for(int32_t j = std::abs(gaps[i]) + 2; j > 0; --j) printf("%s", separator); @@ -22,16 +38,27 @@ constexpr void formatted_line(const std::array& gaps, const char* co printf("%s", middle); } - printf("%s\n", left); + printf("%s\n", right); } +/** + * @brief Print a formatted header with the given titles and sizes. + * + * @param gaps List of size gaps + * @param titles List of titles + */ template -constexpr void header(const std::array& titles, const std::array& gaps) noexcept { +constexpr void header(const std::array& gaps, const std::array& titles) noexcept { formatted_line(gaps, "┌", "┬", "─", "┐"); formatted_row(gaps, titles); formatted_line(gaps, "├", "┼", "─", "┤"); } +/** + * @brief Print a formatted footer with the given sizes. + * + * @param gaps List of size gaps + */ template constexpr inline void footer(const std::array& gaps) noexcept { formatted_line(gaps, "└", "┴", "─", "┘"); @@ -40,7 +67,36 @@ constexpr inline void footer(const std::array& gaps) noexcept { #define duration_ns(a) std::chrono::duration_cast(a).count() #define perf_counter_ns() std::chrono::high_resolution_clock::now() +/** + * @brief Format the time in seconds in human readable format. + * + * @param time number of seconds + * @return The formatted human readable string + */ std::string format_time(uint64_t) noexcept; + +/** + * @brief Format the time in nanoseconds in human readable format. + * + * @param time Time in nanoseconds + * @return std::string The formatted human readable string + */ std::string format_time_ns(uint64_t) noexcept; + +/** + * @brief Convert the number of byte in JEDEC standard form. + * See more : https://en.wikipedia.org/wiki/JEDEC_memory_standards + * + * @param bytes Number of bytes + * @return JEDEC compliant formatted number of bytes + */ std::string format_byte_size(uint64_t) noexcept; + +/** + * @brief Format a number with a separator (i.e. 1000 as 1,000) + * + * @param k number to format + * @param separator used between each thouand + * @return Formatted number + */ std::string thousand_sep(uint64_t, const char& = ',') noexcept; diff --git a/cpp/toolbox_unit_test.cpp b/cpp/toolbox_unit_test.cpp index 5ae3a77..c75e558 100644 --- a/cpp/toolbox_unit_test.cpp +++ b/cpp/toolbox_unit_test.cpp @@ -2,14 +2,25 @@ #include #include +/** + * @brief Test if a given result is equal of the expected one and log result + * + * @tparam T type of returning values + * @param name of the unit test + * @param expected result of the function call + * @param result of the function + */ template -void Assert(const char* const name, const T& expected, const T& result) noexcept { +static void Assert(const char* const name, const T& expected, const T& result) noexcept { if(expected != result){ std::cerr << "For test named " << name << " Expected '" << expected << "' but got '" << result << "' instead\n"; assert(false); } } +/** + * @brief Test suite for the format_byte_size output + */ void format_byte_size_test(void) noexcept { Assert("format_byte_size null", std::string("0B"), format_byte_size(static_cast(0))); Assert("format_byte_size byte", std::string("1B"), format_byte_size(static_cast(1))); @@ -26,6 +37,9 @@ void format_byte_size_test(void) noexcept { Assert("format_byte_size max", std::string("15EB 1023PB 1023TB 1023GB 1023MB 1023KB 1023B"), format_byte_size(static_cast(-1))); } +/** + * @brief Test suite for the format_time output + */ void format_time_test(void) noexcept { // https://en.wikipedia.org/wiki/Unit_of_time Assert("format_time null", std::string("0s"), format_time(static_cast(0))); @@ -80,6 +94,9 @@ void format_time_test(void) noexcept { Assert("format_time max", std::string("5849424173c 55y 3w 5j 7h 15s"), format_time(static_cast(-1))); } +/** + * @brief Test suite for the format_time_ns output + */ void format_time_ns_test(void) noexcept { // https://en.wikipedia.org/wiki/Unit_of_time Assert("format_time_ns null", std::string("0ns"), format_time_ns(static_cast(0))); @@ -140,6 +157,9 @@ void format_time_ns_test(void) noexcept { Assert("format_time_ns max", std::string("5c 84y 11M 2j 23h 34m 33s 709ms 551us 615ns"), format_time_ns(static_cast(-1))); } +/** + * @brief Test suite for the thousand_sep output + */ void thousand_sep_test(void) noexcept { // https://en.wikipedia.org/wiki/Names_of_large_numbers Assert("thousand_sep null", std::string("0"), thousand_sep(static_cast(0))); diff --git a/cpp/toolbox_unit_test.hpp b/cpp/toolbox_unit_test.hpp index 121c165..e11cf74 100644 --- a/cpp/toolbox_unit_test.hpp +++ b/cpp/toolbox_unit_test.hpp @@ -1,6 +1,21 @@ #pragma once +/** + * @brief Test suite for the format_byte_size output + */ void format_byte_size_test(void) noexcept; + +/** + * @brief Test suite for the format_time output + */ void format_time_test(void) noexcept; + +/** + * @brief Test suite for the format_time_ns output + */ void format_time_ns_test(void) noexcept; + +/** + * @brief Test suite for the thousand_sep output + */ void thousand_sep_test(void) noexcept;