cpp : Added documentation
This commit is contained in:
parent
f7ac38b93a
commit
c71b04f00d
@ -11,7 +11,7 @@ CFLAGS := -dlto -O2 -Xcompiler -O2
|
|||||||
CFLAGS := $(CFLAGS) -MMD -MP -Werror=all-warnings -Xcompiler -Wall,-Werror,-Werror=implicit-fallthrough=0,-Wextra
|
CFLAGS := $(CFLAGS) -MMD -MP -Werror=all-warnings -Xcompiler -Wall,-Werror,-Werror=implicit-fallthrough=0,-Wextra
|
||||||
EXEC := $(OBJ_DIR)/ViolaJones
|
EXEC := $(OBJ_DIR)/ViolaJones
|
||||||
DATA := $(DATA_PATH)/X_train.bin $(DATA_PATH)/X_test.bin $(DATA_PATH)/y_train.bin $(DATA_PATH)/y_test.bin
|
DATA := $(DATA_PATH)/X_train.bin $(DATA_PATH)/X_test.bin $(DATA_PATH)/y_train.bin $(DATA_PATH)/y_test.bin
|
||||||
SRC := $(shell find $(SRC_DIR) -name '*.cpp' -o -name '*.cu' )
|
SRC := $(shell find $(SRC_DIR) -name '*.cpp' -o -name '*.cu')
|
||||||
OBJ_EXT := o
|
OBJ_EXT := o
|
||||||
ifeq ($(OS), Windows_NT)
|
ifeq ($(OS), Windows_NT)
|
||||||
EXEC := $(EXEC).exe
|
EXEC := $(EXEC).exe
|
||||||
|
@ -1,55 +1,61 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
#include "config.hpp"
|
#include "ViolaJones_device.hpp"
|
||||||
#include "ViolaJonesGPU.hpp"
|
|
||||||
|
|
||||||
static inline void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept {
|
constexpr static inline void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept {
|
||||||
memset(&feats[n], 0, 4 * sizeof(uint8_t));
|
memset(&feats[n], 0, 4 * sizeof(uint8_t));
|
||||||
n += 4;
|
n += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
constexpr static inline void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i + w;
|
feats[n++] = i + w;
|
||||||
feats[n++] = j;
|
feats[n++] = j;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
constexpr static inline void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i;
|
feats[n++] = i;
|
||||||
feats[n++] = j;
|
feats[n++] = j;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
constexpr static inline void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i;
|
feats[n++] = i;
|
||||||
feats[n++] = j + h;
|
feats[n++] = j + h;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
constexpr static inline void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i + 2 * w;
|
feats[n++] = i + 2 * w;
|
||||||
feats[n++] = j;
|
feats[n++] = j;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
constexpr static inline void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i;
|
feats[n++] = i;
|
||||||
feats[n++] = j + 2 * h;
|
feats[n++] = j + 2 * h;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
constexpr static inline void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i + w;
|
feats[n++] = i + w;
|
||||||
feats[n++] = j + h;
|
feats[n++] = j + h;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize the features based on the input shape.
|
||||||
|
*
|
||||||
|
* @param width Width of the image
|
||||||
|
* @param height Height of the image
|
||||||
|
* @return The initialized features
|
||||||
|
*/
|
||||||
np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height) noexcept {
|
np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height) noexcept {
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
uint16_t w, h, i, j;
|
uint16_t w, h, i, j;
|
||||||
@ -162,6 +168,12 @@ np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height)
|
|||||||
// return res;
|
// return res;
|
||||||
//}
|
//}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize the weights of the weak classifiers based on the training labels.
|
||||||
|
*
|
||||||
|
* @param y_train Training labels
|
||||||
|
* @return The initialized weights
|
||||||
|
*/
|
||||||
np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept {
|
np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept {
|
||||||
np::Array<float64_t> weights = np::empty<float64_t>(y_train.shape);
|
np::Array<float64_t> weights = np::empty<float64_t>(y_train.shape);
|
||||||
const uint16_t t = np::sum(np::astype<uint16_t>(y_train));
|
const uint16_t t = np::sum(np::astype<uint16_t>(y_train));
|
||||||
@ -171,13 +183,30 @@ np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<uint8_t> classify_weak_clf(const np::Array<int32_t>& X_feat_i, const size_t& j, const float64_t& threshold, const float64_t& polarity) noexcept {
|
/**
|
||||||
|
* @brief Classify the integrated features based on polarity and threshold.
|
||||||
|
*
|
||||||
|
* @param X_feat_i Integrated features
|
||||||
|
* @param j Index of the classifier
|
||||||
|
* @param threshold Trained threshold
|
||||||
|
* @param polarity Trained polarity
|
||||||
|
* @return Classified features
|
||||||
|
*/
|
||||||
|
static np::Array<uint8_t> classify_weak_clf(const np::Array<int32_t>& X_feat_i, const size_t& j, const float64_t& threshold, const float64_t& polarity) noexcept {
|
||||||
np::Array<uint8_t> res = np::empty<uint8_t>({ X_feat_i.shape[1] });
|
np::Array<uint8_t> res = np::empty<uint8_t>({ X_feat_i.shape[1] });
|
||||||
for(size_t i = 0; i < res.shape[0]; ++i)
|
for(size_t i = 0; i < res.shape[0]; ++i)
|
||||||
res[i] = polarity * X_feat_i[j * X_feat_i.shape[1] + i] < polarity * threshold ? 1 : 0;
|
res[i] = polarity * X_feat_i[j * X_feat_i.shape[1] + i] < polarity * threshold ? 1 : 0;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Classify the trained classifiers on the given features.
|
||||||
|
*
|
||||||
|
* @param alphas Trained alphas
|
||||||
|
* @param classifiers Trained classifiers
|
||||||
|
* @param X_feat integrated features
|
||||||
|
* @return Classification results
|
||||||
|
*/
|
||||||
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, const np::Array<float64_t>& classifiers, const np::Array<int32_t>& X_feat) noexcept {
|
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, const np::Array<float64_t>& classifiers, const np::Array<int32_t>& X_feat) noexcept {
|
||||||
np::Array<float64_t> total = np::zeros<float64_t>({ X_feat.shape[1] });
|
np::Array<float64_t> total = np::zeros<float64_t>({ X_feat.shape[1] });
|
||||||
|
|
||||||
@ -198,6 +227,15 @@ np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, cons
|
|||||||
return y_pred;
|
return y_pred;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Select the best classifer given their predictions.
|
||||||
|
*
|
||||||
|
* @param classifiers The weak classifiers
|
||||||
|
* @param weights Trained weights of each classifiers
|
||||||
|
* @param X_feat Integrated features
|
||||||
|
* @param y Features labels
|
||||||
|
* @return Index of the best classifier, the best error and the best accuracy
|
||||||
|
*/
|
||||||
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>& classifiers, const np::Array<float64_t>& weights, const np::Array<int32_t>& X_feat, const np::Array<uint8_t>& y) noexcept {
|
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>& classifiers, const np::Array<float64_t>& weights, const np::Array<int32_t>& X_feat, const np::Array<uint8_t>& y) noexcept {
|
||||||
std::tuple<int32_t, float64_t, np::Array<float64_t>> res = { -1, np::inf, np::empty<float64_t>({ X_feat.shape[0] }) };
|
std::tuple<int32_t, float64_t, np::Array<float64_t>> res = { -1, np::inf, np::empty<float64_t>({ X_feat.shape[0] }) };
|
||||||
|
|
||||||
@ -215,6 +253,15 @@ std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Train the weak calssifiers.
|
||||||
|
*
|
||||||
|
* @param T Number of weak classifiers
|
||||||
|
* @param X_feat Integrated features
|
||||||
|
* @param X_feat_argsort Sorted indexes of the integrated features
|
||||||
|
* @param y Features labels
|
||||||
|
* @return List of trained alphas and the list of the final classifiers
|
||||||
|
*/
|
||||||
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y) noexcept {
|
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y) noexcept {
|
||||||
np::Array<float64_t> weights = init_weights(y);
|
np::Array<float64_t> weights = init_weights(y);
|
||||||
np::Array<float64_t> alphas = np::empty<float64_t>({ T });
|
np::Array<float64_t> alphas = np::empty<float64_t>({ T });
|
||||||
@ -222,11 +269,7 @@ std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np:
|
|||||||
|
|
||||||
for(size_t t = 0; t < T; ++t ){
|
for(size_t t = 0; t < T; ++t ){
|
||||||
weights /= np::sum(weights);
|
weights /= np::sum(weights);
|
||||||
#if GPU_BOOSTED
|
const np::Array<float64_t> classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights);
|
||||||
const np::Array<float64_t> classifiers = train_weak_clf_gpu(X_feat, X_feat_argsort, y, weights);
|
|
||||||
#else
|
|
||||||
const np::Array<float64_t> classifiers = train_weak_clf_cpu(X_feat, X_feat_argsort, y, weights);
|
|
||||||
#endif
|
|
||||||
const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y);
|
const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y);
|
||||||
float64_t beta = error / (1.0 - error);
|
float64_t beta = error / (1.0 - error);
|
||||||
weights *= np::pow(beta, (1.0 - accuracy));
|
weights *= np::pow(beta, (1.0 - accuracy));
|
||||||
@ -238,6 +281,13 @@ std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np:
|
|||||||
return { alphas, final_classifier };
|
return { alphas, final_classifier };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the accuracy score i.e. how a given set of measurements are close to their true value.
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed accuracy score
|
||||||
|
*/
|
||||||
float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
float64_t res = 0.0;
|
float64_t res = 0.0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -246,6 +296,13 @@ float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>&
|
|||||||
return res / y.shape[0];
|
return res / y.shape[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the precision score i.e. how a given set of measurements are close to each other.
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed precision score
|
||||||
|
*/
|
||||||
float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
uint16_t true_positive = 0, false_positive = 0;
|
uint16_t true_positive = 0, false_positive = 0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -258,6 +315,13 @@ float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>&
|
|||||||
return static_cast<float64_t>(true_positive) / (true_positive + false_positive);
|
return static_cast<float64_t>(true_positive) / (true_positive + false_positive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the recall score i.e. the ratio (TP / (TP + FN)) where TP is the number of true positives and FN the number of false negatives.
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed recall score
|
||||||
|
*/
|
||||||
float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
uint16_t true_positive = 0, false_negative = 0;
|
uint16_t true_positive = 0, false_negative = 0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -271,12 +335,35 @@ float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_
|
|||||||
return static_cast<float64_t>(true_positive) / (true_positive + false_negative);
|
return static_cast<float64_t>(true_positive) / (true_positive + false_negative);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the F1 score aka balanced F-score or F-measure.
|
||||||
|
*
|
||||||
|
* F1 = (2 * TP) / (2 * TP + FP + FN)
|
||||||
|
* where TP is the true positives,
|
||||||
|
* FP is the false positives,
|
||||||
|
* and FN is the false negatives
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed F1 score
|
||||||
|
*/
|
||||||
float64_t f1_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
float64_t f1_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
const float64_t precision = precision_score(y, y_pred);
|
const float64_t precision = precision_score(y, y_pred);
|
||||||
const float64_t recall = recall_score(y, y_pred);
|
const float64_t recall = recall_score(y, y_pred);
|
||||||
return 2 * (precision * recall) / (precision + recall);
|
return 2 * (precision * recall) / (precision + recall);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the confusion matrix to evaluate a given classification.
|
||||||
|
*
|
||||||
|
* A confusion matrix of a binary classification consists of a 2x2 matrix containing
|
||||||
|
* | True negatives | False positives |
|
||||||
|
* | False negatives | True positives |
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed confusion matrix
|
||||||
|
*/
|
||||||
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
uint16_t true_positive = 0, false_positive = 0, true_negative = 0, false_negative = 0;
|
uint16_t true_positive = 0, false_positive = 0, true_negative = 0, false_negative = 0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
|
@ -2,8 +2,15 @@
|
|||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
#include "toolbox.hpp"
|
#include "toolbox.hpp"
|
||||||
//#include "config.hpp"
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test if a array from a CPU computation is equal to a GPU computation equivalent.
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the arrays to test
|
||||||
|
* @param cpu CPU Array
|
||||||
|
* @param gpu GPU Array
|
||||||
|
* @return Whether the test was succesful
|
||||||
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
|
bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
|
||||||
if (cpu.shape != gpu.shape) {
|
if (cpu.shape != gpu.shape) {
|
||||||
@ -27,6 +34,14 @@ bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noex
|
|||||||
return eq == length;
|
return eq == length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test if a given 2D array of indices sort a given 2D array
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the array to test
|
||||||
|
* @param a 2D Array of data
|
||||||
|
* @param indices 2D Indices that sort the array
|
||||||
|
* @return Whether the test was successful
|
||||||
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
|
bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
|
||||||
if (a.shape != indices.shape) {
|
if (a.shape != indices.shape) {
|
||||||
@ -51,6 +66,18 @@ bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indi
|
|||||||
return correct == total;
|
return correct == total;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Benchmark a function and display the result in stdout.
|
||||||
|
*
|
||||||
|
* @tparam T Resulting type of the function to benchmark
|
||||||
|
* @tparam F Signature of the function to call
|
||||||
|
* @tparam Args Arguments variadic of the function to call
|
||||||
|
* @param step_name Name of the function to log
|
||||||
|
* @param column_width Width of the column to print during logging
|
||||||
|
* @param fnc Function to benchmark
|
||||||
|
* @param args Arguments to pass to the function to call
|
||||||
|
* @return Result of the benchmarked function
|
||||||
|
*/
|
||||||
template <typename T, typename F, typename... Args>
|
template <typename T, typename F, typename... Args>
|
||||||
T benchmark_function(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
|
T benchmark_function(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -64,6 +91,16 @@ T benchmark_function(const char* const step_name, const int32_t& column_width, c
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Benchmark a function and display the result in stdout.
|
||||||
|
*
|
||||||
|
* @tparam F Signature of the function to call
|
||||||
|
* @tparam Args Arguments variadic of the function to call
|
||||||
|
* @param step_name Name of the function to log
|
||||||
|
* @param column_width Width of the column to print during logging
|
||||||
|
* @param fnc Function to benchmark
|
||||||
|
* @param args Arguments to pass to the function to call
|
||||||
|
*/
|
||||||
template <typename F, typename... Args>
|
template <typename F, typename... Args>
|
||||||
void benchmark_function_void(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
|
void benchmark_function_void(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -76,6 +113,22 @@ void benchmark_function_void(const char* const step_name, const int32_t& column_
|
|||||||
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Either execute a function then save the result or load the already cached result.
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the resulting array
|
||||||
|
* @tparam F Signature of the function to call
|
||||||
|
* @tparam Args Arguments variadic of the function to call
|
||||||
|
* @param step_name Name of the function to log
|
||||||
|
* @param column_width Width of the column to print during logging
|
||||||
|
* @param filename Name of the filename where the result is saved
|
||||||
|
* @param force_redo Recall the function even if the result is already saved, ignored if result is not cached
|
||||||
|
* @param save_state Whether the computed result will be saved or not, ignore if loading already cached result
|
||||||
|
* @param out_dir Path of the directory to save the result
|
||||||
|
* @param fnc Function to call
|
||||||
|
* @param args Arguments to pass to the function to call
|
||||||
|
* @return The result of the called function
|
||||||
|
*/
|
||||||
template <typename T, typename F, typename... Args>
|
template <typename T, typename F, typename... Args>
|
||||||
np::Array<T> state_saver(const char* const step_name, const int32_t& column_width, const char* const filename, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
|
np::Array<T> state_saver(const char* const step_name, const int32_t& column_width, const char* const filename, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
|
||||||
char filepath[BUFFER_SIZE] = { 0 };
|
char filepath[BUFFER_SIZE] = { 0 };
|
||||||
@ -83,7 +136,6 @@ np::Array<T> state_saver(const char* const step_name, const int32_t& column_widt
|
|||||||
|
|
||||||
np::Array<T> bin;
|
np::Array<T> bin;
|
||||||
if (!std::filesystem::exists(filepath) || force_redo) {
|
if (!std::filesystem::exists(filepath) || force_redo) {
|
||||||
//bin = std::move(benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...));
|
|
||||||
bin = benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...);
|
bin = benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...);
|
||||||
if(save_state){
|
if(save_state){
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -101,13 +153,28 @@ np::Array<T> state_saver(const char* const step_name, const int32_t& column_widt
|
|||||||
fprintf(stderr, "Loading results of %s\r", step_name);
|
fprintf(stderr, "Loading results of %s\r", step_name);
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
#endif
|
#endif
|
||||||
//bin = std::move(load<T>(filepath));
|
|
||||||
bin = load<T>(filepath);
|
bin = load<T>(filepath);
|
||||||
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
|
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
|
||||||
}
|
}
|
||||||
return bin;
|
return bin;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Either execute a function then saves the results or load the already cached result.
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the resulting arrays
|
||||||
|
* @tparam F Signature of the function to call
|
||||||
|
* @tparam Args Arguments variadic of the function to call
|
||||||
|
* @param step_name Name of the function to log
|
||||||
|
* @param column_width Width of the column to print during logging
|
||||||
|
* @param filenames List of names of the filenames where the results are save
|
||||||
|
* @param force_redo Recall the function even if the results are already saved, ignored if results are not cached
|
||||||
|
* @param save_state Whether the computed results will be saved or not, ignored if loading already cached results
|
||||||
|
* @param out_dir Path of the directory to save the results
|
||||||
|
* @param fnc Function to call
|
||||||
|
* @param args Arguments to pass to the function to call
|
||||||
|
* @return The results of the called function
|
||||||
|
*/
|
||||||
template <typename T, size_t N, typename F, typename... Args>
|
template <typename T, size_t N, typename F, typename... Args>
|
||||||
std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32_t& column_width, const std::vector<const char*>& filenames, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
|
std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32_t& column_width, const std::vector<const char*>& filenames, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
|
||||||
char filepath[BUFFER_SIZE] = { 0 };
|
char filepath[BUFFER_SIZE] = { 0 };
|
||||||
@ -122,7 +189,6 @@ std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32
|
|||||||
|
|
||||||
std::array<np::Array<T>, N> bin;
|
std::array<np::Array<T>, N> bin;
|
||||||
if (abs || force_redo) {
|
if (abs || force_redo) {
|
||||||
//bin = std::move(benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...));
|
|
||||||
bin = benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...);
|
bin = benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...);
|
||||||
if (save_state){
|
if (save_state){
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -145,25 +211,116 @@ std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32
|
|||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
#endif
|
#endif
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
bin[i++] = std::move(load<T>(filepath));
|
|
||||||
for (const char* const filename : filenames){
|
for (const char* const filename : filenames){
|
||||||
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename);
|
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename);
|
||||||
|
bin[i++] = load<T>(filepath);
|
||||||
}
|
}
|
||||||
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
|
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
|
||||||
}
|
}
|
||||||
return bin;
|
return bin;
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>&) noexcept;
|
/**
|
||||||
|
* @brief Initialize the features based on the input shape.
|
||||||
|
*
|
||||||
|
* @param width Width of the image
|
||||||
|
* @param height Height of the image
|
||||||
|
* @return The initialized features
|
||||||
|
*/
|
||||||
np::Array<uint8_t> build_features(const uint16_t&, const uint16_t&) noexcept;
|
np::Array<uint8_t> build_features(const uint16_t&, const uint16_t&) noexcept;
|
||||||
np::Array<int32_t> select_percentile(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
//np::Array<int32_t> select_percentile(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Classify the trained classifiers on the given features.
|
||||||
|
*
|
||||||
|
* @param alphas Trained alphas
|
||||||
|
* @param classifiers Trained classifiers
|
||||||
|
* @param X_feat integrated features
|
||||||
|
* @return Classification results
|
||||||
|
*/
|
||||||
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&) noexcept;
|
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize the weights of the weak classifiers based on the training labels.
|
||||||
|
*
|
||||||
|
* @param y_train Training labels
|
||||||
|
* @return The initialized weights
|
||||||
|
*/
|
||||||
np::Array<float64_t> init_weights(const np::Array<uint8_t>&) noexcept;
|
np::Array<float64_t> init_weights(const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Select the best classifer given their predictions.
|
||||||
|
*
|
||||||
|
* @param classifiers The weak classifiers
|
||||||
|
* @param weights Trained weights of each classifiers
|
||||||
|
* @param X_feat Integrated features
|
||||||
|
* @param y Features labels
|
||||||
|
* @return Index of the best classifier, the best error and the best accuracy
|
||||||
|
*/
|
||||||
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&,
|
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&,
|
||||||
const np::Array<uint8_t>&) noexcept;
|
const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Train the weak calssifiers.
|
||||||
|
*
|
||||||
|
* @param T Number of weak classifiers
|
||||||
|
* @param X_feat Integrated features
|
||||||
|
* @param X_feat_argsort Sorted indexes of the integrated features
|
||||||
|
* @param y Features labels
|
||||||
|
* @return List of trained alphas and the list of the final classifiers
|
||||||
|
*/
|
||||||
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t&, const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&) noexcept;
|
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t&, const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the accuracy score i.e. how a given set of measurements are close to their true value.
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed accuracy score
|
||||||
|
*/
|
||||||
float64_t accuracy_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
float64_t accuracy_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the precision score i.e. how a given set of measurements are close to each other.
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed precision score
|
||||||
|
*/
|
||||||
float64_t precision_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
float64_t precision_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the recall score i.e. the ratio (TP / (TP + FN)) where TP is the number of true positives and FN the number of false negatives.
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed recall score
|
||||||
|
*/
|
||||||
float64_t recall_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
float64_t recall_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the F1 score aka balanced F-score or F-measure.
|
||||||
|
*
|
||||||
|
* F1 = (2 * TP) / (2 * TP + FP + FN)
|
||||||
|
* where TP is the true positives,
|
||||||
|
* FP is the false positives,
|
||||||
|
* and FN is the false negatives
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed F1 score
|
||||||
|
*/
|
||||||
float64_t f1_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
float64_t f1_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the confusion matrix to evaluate a given classification.
|
||||||
|
*
|
||||||
|
* A confusion matrix of a binary classification consists of a 2x2 matrix containing
|
||||||
|
* | True negatives | False positives |
|
||||||
|
* | False negatives | True positives |
|
||||||
|
*
|
||||||
|
* @param y Ground truth labels
|
||||||
|
* @param y_pred Predicted labels
|
||||||
|
* @return computed confusion matrix
|
||||||
|
*/
|
||||||
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
@ -1,7 +1,15 @@
|
|||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
#include "toolbox.hpp"
|
#include "config.hpp"
|
||||||
|
|
||||||
np::Array<uint32_t> set_integral_image_cpu(const np::Array<uint8_t>& set) noexcept {
|
#if GPU_BOOSTED == false
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Transform the input images in integrated images (CPU version).
|
||||||
|
*
|
||||||
|
* @param X Dataset of images
|
||||||
|
* @return Dataset of integrated images
|
||||||
|
*/
|
||||||
|
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& set) noexcept {
|
||||||
np::Array<uint32_t> X_ii = np::empty<uint32_t>(set.shape);
|
np::Array<uint32_t> X_ii = np::empty<uint32_t>(set.shape);
|
||||||
|
|
||||||
size_t i, y, x, s;
|
size_t i, y, x, s;
|
||||||
@ -31,7 +39,14 @@ constexpr static inline int16_t __compute_feature__(const np::Array<uint32_t>& X
|
|||||||
return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w];
|
return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w];
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<int32_t> apply_features_cpu(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
/**
|
||||||
|
* @brief Apply the features on a integrated image dataset (CPU version).
|
||||||
|
*
|
||||||
|
* @param feats Features to apply
|
||||||
|
* @param X_ii Integrated image dataset
|
||||||
|
* @return Applied features
|
||||||
|
*/
|
||||||
|
np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
||||||
np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
||||||
|
|
||||||
size_t j, feat_idx = 0;
|
size_t j, feat_idx = 0;
|
||||||
@ -51,7 +66,7 @@ np::Array<int32_t> apply_features_cpu(const np::Array<uint8_t>& feats, const np:
|
|||||||
return X_feat;
|
return X_feat;
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<float64_t> train_weak_clf_cpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y, const np::Array<float64_t>& weights) noexcept {
|
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y, const np::Array<float64_t>& weights) noexcept {
|
||||||
float64_t total_pos = 0.0, total_neg = 0.0;
|
float64_t total_pos = 0.0, total_neg = 0.0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
(y[i] == static_cast<uint8_t>(1) ? total_pos : total_neg) += weights[i];
|
(y[i] == static_cast<uint8_t>(1) ? total_pos : total_neg) += weights[i];
|
||||||
@ -81,7 +96,69 @@ np::Array<float64_t> train_weak_clf_cpu(const np::Array<int32_t>& X_feat, const
|
|||||||
return classifiers;
|
return classifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>& X_feat) noexcept {
|
/**
|
||||||
|
* @brief Perform an indirect sort of a given array within a given bound.
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the array
|
||||||
|
* @param a Array to sort
|
||||||
|
* @param indices Array of indices to write to
|
||||||
|
* @param low lower bound to sort
|
||||||
|
* @param high higher bound to sort
|
||||||
|
*/
|
||||||
|
template<typename T>
|
||||||
|
static void argsort(const T* const a, uint16_t* const indices, size_t low, size_t high) noexcept {
|
||||||
|
const size_t total = high - low + 1;
|
||||||
|
|
||||||
|
size_t* const stack = new size_t[total]{low, high};
|
||||||
|
//size_t stack[total];
|
||||||
|
//stack[0] = l;
|
||||||
|
//stack[1] = h;
|
||||||
|
size_t top = 1;
|
||||||
|
|
||||||
|
while (top <= total) {
|
||||||
|
high = stack[top--];
|
||||||
|
low = stack[top--];
|
||||||
|
if(low >= high)
|
||||||
|
break;
|
||||||
|
|
||||||
|
const size_t p = as_partition(a, indices, low, high);
|
||||||
|
|
||||||
|
if (p - 1 > low && p - 1 < total) {
|
||||||
|
stack[++top] = low;
|
||||||
|
stack[++top] = p - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p + 1 < high) {
|
||||||
|
stack[++top] = p + 1;
|
||||||
|
stack[++top] = high;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete[] stack;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Apply argsort to every column of a given 2D array.
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the array
|
||||||
|
* @param a 2D Array to sort
|
||||||
|
* @return 2D Array of indices that sort the array
|
||||||
|
*/
|
||||||
|
template<typename T>
|
||||||
|
static np::Array<uint16_t> argsort_bounded(const np::Array<T>& a, const size_t& low, const size_t& high) noexcept {
|
||||||
|
np::Array<uint16_t> indices = np::empty(a.shape);
|
||||||
|
map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; });
|
||||||
|
|
||||||
|
argsort_bounded(a, indices, low, high);
|
||||||
|
return indices;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Perform an indirect sort on each column of a given 2D array (CPU version).
|
||||||
|
*
|
||||||
|
* @param a 2D Array to sort
|
||||||
|
* @return 2D Array of indices that sort the array
|
||||||
|
*/
|
||||||
|
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& X_feat) noexcept {
|
||||||
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
|
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
|
||||||
const size_t length = np::prod(X_feat.shape);
|
const size_t length = np::prod(X_feat.shape);
|
||||||
for (size_t i = 0; i < length; i += X_feat.shape[1]) {
|
for (size_t i = 0; i < length; i += X_feat.shape[1]) {
|
||||||
@ -91,3 +168,4 @@ np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>& X_feat) noexcept {
|
|||||||
return indices;
|
return indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif // GPU_BOOSTED == false
|
||||||
|
@ -1,8 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
#include "data.hpp"
|
|
||||||
|
|
||||||
np::Array<uint32_t> set_integral_image_cpu(const np::Array<uint8_t>&) noexcept;
|
|
||||||
np::Array<int32_t> apply_features_cpu(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
|
|
||||||
np::Array<float64_t> train_weak_clf_cpu(const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&,
|
|
||||||
const np::Array<float64_t>&) noexcept;
|
|
||||||
np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>&) noexcept;
|
|
@ -1,5 +1,14 @@
|
|||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
|
#include "config.hpp"
|
||||||
|
|
||||||
|
#if GPU_BOOSTED
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Prefix Sum (scan) of a given dataset.
|
||||||
|
*
|
||||||
|
* @param X Dataset of images to apply sum
|
||||||
|
* @return Scanned dataset of images
|
||||||
|
*/
|
||||||
static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept {
|
static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept {
|
||||||
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
|
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
|
||||||
const size_t total = np::prod(X_scan.shape);
|
const size_t total = np::prod(X_scan.shape);
|
||||||
@ -16,6 +25,14 @@ static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept
|
|||||||
return X_scan;
|
return X_scan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GPU kernel used to do a parallel prefix sum (scan).
|
||||||
|
*
|
||||||
|
* @param n Number of width blocks
|
||||||
|
* @param j Temporary sum index
|
||||||
|
* @param d_inter Temporary sums on device to add
|
||||||
|
* @param d_X Dataset of images on device to apply sum
|
||||||
|
*/
|
||||||
static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np::Array<uint32_t> d_inter, np::Array<uint32_t> d_X) {
|
static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np::Array<uint32_t> d_inter, np::Array<uint32_t> d_X) {
|
||||||
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
|
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
|
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -60,6 +77,14 @@ static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np
|
|||||||
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] = sA[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
|
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] = sA[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GPU kernel for parallel sum.
|
||||||
|
*
|
||||||
|
* @param d_X Dataset of images on device
|
||||||
|
* @param d_s Temporary sums to add on device
|
||||||
|
* @param n Number of width blocks
|
||||||
|
* @param m Height of a block
|
||||||
|
*/
|
||||||
static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint32_t> d_s, const uint16_t n, const uint16_t m) {
|
static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint32_t> d_s, const uint16_t n, const uint16_t m) {
|
||||||
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
|
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
|
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -67,6 +92,14 @@ static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint3
|
|||||||
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] += d_s[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + blockIdx.x];
|
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] += d_s[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + blockIdx.x];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Parallel Prefix Sum (scan) of a given dataset.
|
||||||
|
*
|
||||||
|
* Read more: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
|
||||||
|
*
|
||||||
|
* @param X Dataset of images
|
||||||
|
* @return Scanned dataset of images
|
||||||
|
*/
|
||||||
static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept {
|
static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept {
|
||||||
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
|
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
|
||||||
|
|
||||||
@ -112,6 +145,12 @@ static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept
|
|||||||
return X_scan;
|
return X_scan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GPU kernel of the function __transpose_3d__.
|
||||||
|
*
|
||||||
|
* @param d_X Dataset of images on device
|
||||||
|
* @param d_Xt Transposed dataset of images on device
|
||||||
|
*/
|
||||||
static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::Array<uint32_t> d_Xt) {
|
static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::Array<uint32_t> d_Xt) {
|
||||||
__shared__ uint32_t temp[NB_THREADS_2D_X * NB_THREADS_2D_Y];
|
__shared__ uint32_t temp[NB_THREADS_2D_X * NB_THREADS_2D_Y];
|
||||||
|
|
||||||
@ -128,6 +167,12 @@ static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::A
|
|||||||
d_Xt[blockIdx.z * d_Xt.shape[1] * d_Xt.shape[2] + x * d_X.shape[2] + y] = temp[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
|
d_Xt[blockIdx.z * d_Xt.shape[1] * d_Xt.shape[2] + x * d_X.shape[2] + y] = temp[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Transpose every images in the given dataset.
|
||||||
|
*
|
||||||
|
* @param X Dataset of images
|
||||||
|
* @return Transposed dataset of images
|
||||||
|
*/
|
||||||
static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexcept {
|
static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexcept {
|
||||||
np::Array<uint32_t> Xt = np::empty<uint32_t>({ X.shape[0], X.shape[2], X.shape[1] });
|
np::Array<uint32_t> Xt = np::empty<uint32_t>({ X.shape[0], X.shape[2], X.shape[1] });
|
||||||
|
|
||||||
@ -147,7 +192,13 @@ static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexce
|
|||||||
return Xt;
|
return Xt;
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<uint32_t> set_integral_image_gpu(const np::Array<uint8_t>& X) noexcept {
|
/**
|
||||||
|
* @brief Transform the input images in integrated images (GPU version).
|
||||||
|
*
|
||||||
|
* @param X Dataset of images
|
||||||
|
* @return Dataset of integrated images
|
||||||
|
*/
|
||||||
|
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& X) noexcept {
|
||||||
np::Array<uint32_t> X_ii = np::astype<uint32_t>(X);
|
np::Array<uint32_t> X_ii = np::astype<uint32_t>(X);
|
||||||
X_ii = __scanCPU_3d__(X_ii);
|
X_ii = __scanCPU_3d__(X_ii);
|
||||||
X_ii = __transpose_3d__(X_ii);
|
X_ii = __transpose_3d__(X_ii);
|
||||||
@ -155,53 +206,17 @@ np::Array<uint32_t> set_integral_image_gpu(const np::Array<uint8_t>& X) noexcept
|
|||||||
return __transpose_3d__(X_ii);
|
return __transpose_3d__(X_ii);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __device__ int16_t __compute_feature__(const np::Array<uint32_t>& d_X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept {
|
/**
|
||||||
const size_t _y = y * d_X_ii.shape[1] + x;
|
* @brief GPU kernel of the function train_weak_clf.
|
||||||
const size_t _yh = _y + h * d_X_ii.shape[1];
|
*
|
||||||
return d_X_ii[j + _yh + w] + d_X_ii[j + _y] - d_X_ii[j + _yh] - d_X_ii[j + _y + w];
|
* @param d_classifiers Weak classifiers on device to train
|
||||||
}
|
* @param d_y Labels of the features on device
|
||||||
|
* @param d_X_feat Feature images dataset on device
|
||||||
static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Array<uint8_t> d_feats, const np::Array<uint32_t> d_X_ii) {
|
* @param d_X_feat_argsort Sorted indexes of the integrated features on device
|
||||||
size_t i = blockIdx.x * blockDim.x + threadIdx.x;
|
* @param d_weights Weights of the features on device
|
||||||
size_t j = blockIdx.y * blockDim.y + threadIdx.y;
|
* @param total_pos Total of positive labels in the dataset
|
||||||
|
* @param total_neg Total of negative labels in the dataset
|
||||||
if (i >= d_feats.shape[0] || j >= d_X_ii.shape[0])
|
*/
|
||||||
return;
|
|
||||||
|
|
||||||
const size_t k = i * d_X_ii.shape[0] + j;
|
|
||||||
i *= np::prod(d_feats.shape, 1);
|
|
||||||
j *= np::prod(d_X_ii.shape, 1);
|
|
||||||
const int16_t p1 = __compute_feature__(d_X_ii, j, d_feats[i + 0], d_feats[i + 1], d_feats[i + 2], d_feats[i + 3]);
|
|
||||||
const int16_t p2 = __compute_feature__(d_X_ii, j, d_feats[i + 4], d_feats[i + 5], d_feats[i + 6], d_feats[i + 7]);
|
|
||||||
const int16_t n1 = __compute_feature__(d_X_ii, j, d_feats[i + 8], d_feats[i + 9], d_feats[i + 10], d_feats[i + 11]);
|
|
||||||
const int16_t n2 = __compute_feature__(d_X_ii, j, d_feats[i + 12], d_feats[i + 13], d_feats[i + 14], d_feats[i + 15]);
|
|
||||||
d_X_feat[k] = static_cast<int32_t>(p1 + p2) - static_cast<int32_t>(n1 + n2);
|
|
||||||
}
|
|
||||||
|
|
||||||
np::Array<int32_t> apply_features_gpu(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
|
||||||
const np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
|
||||||
int32_t* d_X_feat = nullptr;
|
|
||||||
|
|
||||||
_print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t)));
|
|
||||||
np::Array<uint32_t> d_X_ii = copyToDevice<uint32_t>("X_ii", X_ii);
|
|
||||||
np::Array<uint8_t> d_feats = copyToDevice<uint8_t>("feats", feats);
|
|
||||||
|
|
||||||
const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X)));
|
|
||||||
const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
|
|
||||||
const dim3 dimGrid(dimX, dimY);
|
|
||||||
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
|
||||||
__apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii);
|
|
||||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
|
||||||
|
|
||||||
_print_cuda_error_("memcpy X_feat", cudaMemcpy(X_feat.data, d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t), cudaMemcpyDeviceToHost));
|
|
||||||
|
|
||||||
_print_cuda_error_("free d_X_feat", cudaFree(d_X_feat));
|
|
||||||
cudaFree("free d_feats", d_feats);
|
|
||||||
cudaFree("free d_X_11", d_X_ii);
|
|
||||||
|
|
||||||
return X_feat;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifiers, const np::Array<uint8_t> d_y,
|
static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifiers, const np::Array<uint8_t> d_y,
|
||||||
const np::Array<int32_t> d_X_feat, const np::Array<uint16_t> d_X_feat_argsort,
|
const np::Array<int32_t> d_X_feat, const np::Array<uint16_t> d_X_feat_argsort,
|
||||||
const np::Array<float64_t> d_weights, const float64_t total_pos, const float64_t total_neg) {
|
const np::Array<float64_t> d_weights, const float64_t total_pos, const float64_t total_neg) {
|
||||||
@ -210,7 +225,7 @@ static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifi
|
|||||||
i += threadIdx.x * blockDim.y * blockDim.z;
|
i += threadIdx.x * blockDim.y * blockDim.z;
|
||||||
i += threadIdx.y * blockDim.z;
|
i += threadIdx.y * blockDim.z;
|
||||||
i += threadIdx.z;
|
i += threadIdx.z;
|
||||||
// const size_t i = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
if(i >= d_classifiers.shape[0])
|
if(i >= d_classifiers.shape[0])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -235,7 +250,16 @@ static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifi
|
|||||||
d_classifiers[i * 2] = best_threshold; d_classifiers[i * 2 + 1] = best_polarity;
|
d_classifiers[i * 2] = best_threshold; d_classifiers[i * 2 + 1] = best_polarity;
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
|
/**
|
||||||
|
* @brief Train the weak classifiers on a given dataset (GPU version).
|
||||||
|
*
|
||||||
|
* @param X_feat Feature images dataset
|
||||||
|
* @param X_feat_argsort Sorted indexes of the integrated features
|
||||||
|
* @param y Labels of the features
|
||||||
|
* @param weights Weights of the features
|
||||||
|
* @return Trained weak classifiers
|
||||||
|
*/
|
||||||
|
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
|
||||||
const np::Array<float64_t>& weights) noexcept {
|
const np::Array<float64_t>& weights) noexcept {
|
||||||
float64_t total_pos = 0.0, total_neg = 0.0;
|
float64_t total_pos = 0.0, total_neg = 0.0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -251,8 +275,6 @@ np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const
|
|||||||
|
|
||||||
const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z)));
|
const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z)));
|
||||||
constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z);
|
constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z);
|
||||||
// const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS)));
|
|
||||||
// constexpr const dim3 dimBlock(NB_THREADS);
|
|
||||||
__train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg);
|
__train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg);
|
||||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||||
|
|
||||||
@ -267,28 +289,118 @@ np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const
|
|||||||
return classifiers;
|
return classifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute a feature on a integrated image at a specific coordinate (GPU version).
|
||||||
|
*
|
||||||
|
* @param d_X_ii Dataset of integrated images on device
|
||||||
|
* @param j Image index in the dataset
|
||||||
|
* @param x X coordinate
|
||||||
|
* @param y Y coordinate
|
||||||
|
* @param w width of the feature
|
||||||
|
* @param h height of the feature
|
||||||
|
*/
|
||||||
|
static inline __device__ int16_t __compute_feature__(const np::Array<uint32_t>& d_X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept {
|
||||||
|
const size_t _y = y * d_X_ii.shape[1] + x;
|
||||||
|
const size_t _yh = _y + h * d_X_ii.shape[1];
|
||||||
|
return d_X_ii[j + _yh + w] + d_X_ii[j + _y] - d_X_ii[j + _yh] - d_X_ii[j + _y + w];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GPU kernel of the function apply_features.
|
||||||
|
*
|
||||||
|
* @param d_X_feat Dataset of image features on device
|
||||||
|
* @param d_feats Features on device to apply
|
||||||
|
* @param d_X_ii Integrated image dataset on device
|
||||||
|
*/
|
||||||
|
static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Array<uint8_t> d_feats, const np::Array<uint32_t> d_X_ii) {
|
||||||
|
size_t i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
size_t j = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
|
if (i >= d_feats.shape[0] || j >= d_X_ii.shape[0])
|
||||||
|
return;
|
||||||
|
|
||||||
|
const size_t k = i * d_X_ii.shape[0] + j;
|
||||||
|
i *= np::prod(d_feats.shape, 1);
|
||||||
|
j *= np::prod(d_X_ii.shape, 1);
|
||||||
|
const int16_t p1 = __compute_feature__(d_X_ii, j, d_feats[i + 0], d_feats[i + 1], d_feats[i + 2], d_feats[i + 3]);
|
||||||
|
const int16_t p2 = __compute_feature__(d_X_ii, j, d_feats[i + 4], d_feats[i + 5], d_feats[i + 6], d_feats[i + 7]);
|
||||||
|
const int16_t n1 = __compute_feature__(d_X_ii, j, d_feats[i + 8], d_feats[i + 9], d_feats[i + 10], d_feats[i + 11]);
|
||||||
|
const int16_t n2 = __compute_feature__(d_X_ii, j, d_feats[i + 12], d_feats[i + 13], d_feats[i + 14], d_feats[i + 15]);
|
||||||
|
d_X_feat[k] = static_cast<int32_t>(p1 + p2) - static_cast<int32_t>(n1 + n2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Apply the features on a integrated image dataset (GPU version).
|
||||||
|
*
|
||||||
|
* @param feats Features to apply
|
||||||
|
* @param X_ii Integrated image dataset
|
||||||
|
* @return Applied features
|
||||||
|
*/
|
||||||
|
np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
||||||
|
const np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
||||||
|
int32_t* d_X_feat = nullptr;
|
||||||
|
|
||||||
|
_print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t)));
|
||||||
|
np::Array<uint32_t> d_X_ii = copyToDevice<uint32_t>("X_ii", X_ii);
|
||||||
|
np::Array<uint8_t> d_feats = copyToDevice<uint8_t>("feats", feats);
|
||||||
|
|
||||||
|
const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X)));
|
||||||
|
const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
|
||||||
|
const dim3 dimGrid(dimX, dimY);
|
||||||
|
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
||||||
|
__apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii);
|
||||||
|
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||||
|
|
||||||
|
_print_cuda_error_("memcpy X_feat", cudaMemcpy(X_feat.data, d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t), cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
_print_cuda_error_("free d_X_feat", cudaFree(d_X_feat));
|
||||||
|
cudaFree("free d_feats", d_feats);
|
||||||
|
cudaFree("free d_X_11", d_X_ii);
|
||||||
|
|
||||||
|
return X_feat;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Partition of the argsort algorithm.
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the array
|
||||||
|
* @param d_a Array on device to sort
|
||||||
|
* @param d_indices Array of indices on device to write to
|
||||||
|
* @param low lower bound to sort
|
||||||
|
* @param high higher bound to sort
|
||||||
|
* @return Last index sorted
|
||||||
|
*/
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__device__ inline static int32_t as_partition_gpu(const T* a, uint16_t* const indices, const size_t l, const size_t h) noexcept {
|
__device__ inline static int32_t _as_partition_(const T* d_a, uint16_t* const d_indices, const size_t low, const size_t high) noexcept {
|
||||||
int32_t i = l - 1;
|
int32_t i = low - 1;
|
||||||
for (int32_t j = l; j <= h; ++j)
|
for (int32_t j = low; j <= high; ++j)
|
||||||
if (a[indices[j]] < a[indices[h]])
|
if (d_a[d_indices[j]] < d_a[d_indices[high]])
|
||||||
swap(&indices[++i], &indices[j]);
|
swap(&d_indices[++i], &d_indices[j]);
|
||||||
swap(&indices[++i], &indices[h]);
|
swap(&d_indices[++i], &d_indices[high]);
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Cuda kernel to perform an indirect sort of a given array within a given bound.
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the array
|
||||||
|
* @param d_a Array on device to sort
|
||||||
|
* @param d_indices Array of indices on device to write to
|
||||||
|
* @param low lower bound to sort
|
||||||
|
* @param high higher bound to sort
|
||||||
|
*/
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__device__ void argsort_gpu(const T* a, uint16_t* const indices, const size_t l, const size_t h) noexcept {
|
__device__ void argsort_kernel(const T* d_a, uint16_t* const d_indices, size_t low, size_t high) noexcept {
|
||||||
const size_t total = h - l + 1;
|
const size_t total = high - low + 1;
|
||||||
|
|
||||||
//int32_t* stack = new int32_t[total]{l, h};
|
//int32_t* stack = new int32_t[total]{low, high};
|
||||||
//int32_t stack[total];
|
//int32_t stack[total];
|
||||||
int32_t stack[6977];
|
int32_t stack[6977];
|
||||||
//int32_t stack[1<<16];
|
//int32_t stack[1<<16];
|
||||||
stack[0] = l;
|
stack[0] = low;
|
||||||
stack[1] = h;
|
stack[1] = high;
|
||||||
|
|
||||||
size_t top = 1, low = l, high = h;
|
size_t top = 1;
|
||||||
|
|
||||||
while (top <= total) {
|
while (top <= total) {
|
||||||
high = stack[top--];
|
high = stack[top--];
|
||||||
@ -296,7 +408,7 @@ __device__ void argsort_gpu(const T* a, uint16_t* const indices, const size_t l,
|
|||||||
if(low >= high)
|
if(low >= high)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
const int32_t p = as_partition_gpu(a, indices, low, high);
|
const int32_t p = _as_partition_(d_a, d_indices, low, high);
|
||||||
|
|
||||||
if (p - 1 > low && p - 1 < total) {
|
if (p - 1 > low && p - 1 < total) {
|
||||||
stack[++top] = low;
|
stack[++top] = low;
|
||||||
@ -311,42 +423,49 @@ __device__ void argsort_gpu(const T* a, uint16_t* const indices, const size_t l,
|
|||||||
//delete[] stack;
|
//delete[] stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Cuda kernel where argsort is applied to every column of a given 2D array.
|
||||||
|
*
|
||||||
|
* @tparam T Inner type of the array
|
||||||
|
* @param d_a 2D Array on device to sort
|
||||||
|
* @param d_indices 2D Array of indices on device to write to
|
||||||
|
*/
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__global__ void argsort_bounded_gpu(const np::Array<T> a, uint16_t* const indices){
|
__global__ void argsort_bounded(const np::Array<T> d_a, uint16_t* const d_indices){
|
||||||
const size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
|
const size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
if (idx >= a.shape[0])
|
if (idx >= d_a.shape[0])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for(size_t y = 0; y < a.shape[1]; ++y) indices[idx * a.shape[1] + y] = y;
|
for(size_t y = 0; y < d_a.shape[1]; ++y) d_indices[idx * d_a.shape[1] + y] = y;
|
||||||
argsort_gpu(&a[idx * a.shape[1]], &indices[idx * a.shape[1]], 0, a.shape[1] - 1);
|
argsort_kernel(&d_a[idx * d_a.shape[1]], &d_indices[idx * d_a.shape[1]], 0, d_a.shape[1] - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<uint16_t> argsort_2d_gpu(const np::Array<int32_t>& X_feat) noexcept {
|
/**
|
||||||
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
|
* @brief Perform an indirect sort on each column of a given 2D array (GPU version).
|
||||||
|
*
|
||||||
|
* @param a 2D Array to sort
|
||||||
|
* @return 2D Array of indices that sort the array
|
||||||
|
*/
|
||||||
|
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& a) noexcept {
|
||||||
|
const np::Array<uint16_t> indices = np::empty<uint16_t>(a.shape);
|
||||||
|
|
||||||
uint16_t* d_indices = nullptr;
|
uint16_t* d_indices = nullptr;
|
||||||
const size_t indices_size = np::prod(indices.shape) * sizeof(uint16_t);
|
const size_t indices_size = np::prod(indices.shape) * sizeof(uint16_t);
|
||||||
|
|
||||||
np::Array<int32_t> d_X_feat = copyToDevice<int32_t>("X_feat", X_feat);
|
np::Array<int32_t> d_a = copyToDevice<int32_t>("X_feat", a);
|
||||||
_print_cuda_error_("malloc d_indices", cudaMalloc(&d_indices, indices_size));
|
_print_cuda_error_("malloc d_indices", cudaMalloc(&d_indices, indices_size));
|
||||||
|
|
||||||
const size_t dimGrid = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS)));
|
const size_t dimGrid = static_cast<size_t>(std::ceil(static_cast<float64_t>(a.shape[0]) / static_cast<float64_t>(NB_THREADS)));
|
||||||
const dim3 dimBlock(NB_THREADS);
|
const dim3 dimBlock(NB_THREADS);
|
||||||
argsort_bounded_gpu<<<dimGrid, dimBlock>>>(d_X_feat, d_indices);
|
argsort_bounded<<<dimGrid, dimBlock>>>(d_a, d_indices);
|
||||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||||
|
|
||||||
_print_cuda_error_("memcpy d_indices", cudaMemcpy(indices.data, d_indices, indices_size, cudaMemcpyDeviceToHost));
|
_print_cuda_error_("memcpy d_indices", cudaMemcpy(indices.data, d_indices, indices_size, cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
cudaFree("free d_X_feat", d_X_feat);
|
cudaFree("free d_a", d_a);
|
||||||
_print_cuda_error_("free d_indices", cudaFree(d_indices));
|
_print_cuda_error_("free d_indices", cudaFree(d_indices));
|
||||||
|
|
||||||
return indices;
|
return indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
#endif // GPU_BOOSTED
|
||||||
size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
|
|
||||||
size_t result = shape[offset];
|
|
||||||
for(size_t i = 1 + offset; i < shape.length; ++i)
|
|
||||||
result *= shape[i];
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
@ -1,8 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
#include "data.hpp"
|
|
||||||
|
|
||||||
np::Array<uint32_t> set_integral_image_gpu(const np::Array<uint8_t>&) noexcept;
|
|
||||||
np::Array<int32_t> apply_features_gpu(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
|
|
||||||
np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
|
|
||||||
const np::Array<float64_t>& weights) noexcept;
|
|
||||||
np::Array<uint16_t> argsort_2d_gpu(const np::Array<int32_t>& X_feat) noexcept;
|
|
39
cpp/ViolaJones_device.hpp
Normal file
39
cpp/ViolaJones_device.hpp
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "data.hpp"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Transform the input images in integrated images.
|
||||||
|
*
|
||||||
|
* @param X Dataset of images
|
||||||
|
* @return Dataset of integrated images
|
||||||
|
*/
|
||||||
|
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Apply the features on a integrated image dataset.
|
||||||
|
*
|
||||||
|
* @param feats Features to apply
|
||||||
|
* @param X_ii Integrated image dataset
|
||||||
|
* @return Applied features
|
||||||
|
*/
|
||||||
|
np::Array<int32_t> apply_features(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Train the weak classifiers on a given dataset.
|
||||||
|
*
|
||||||
|
* @param X_feat Feature images dataset
|
||||||
|
* @param X_feat_argsort Sorted indexes of the integrated features
|
||||||
|
* @param y Labels of the features
|
||||||
|
* @param weights Weights of the features
|
||||||
|
* @return Trained weak classifiers
|
||||||
|
*/
|
||||||
|
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&,
|
||||||
|
const np::Array<float64_t>&) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Perform an indirect sort on each column of a given 2D array
|
||||||
|
*
|
||||||
|
* @param a 2D Array to sort
|
||||||
|
* @return 2D Array of indices that sort the array
|
||||||
|
*/
|
||||||
|
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>&) noexcept;
|
27
cpp/data.cpp
27
cpp/data.cpp
@ -16,14 +16,6 @@ int32_t print(const np::Shape& shape) noexcept {
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
int32_t print(const np::Array<T>& array, const char* const format) noexcept {
|
int32_t print(const np::Array<T>& array, const char* const format) noexcept {
|
||||||
//printf("[");
|
|
||||||
//const size_t length = np::prod(array.shape);
|
|
||||||
//for(size_t i = 0; i < length - 1; ++i)
|
|
||||||
// //std::cout << array[i] << " ";
|
|
||||||
// printf("%f ", array[i]);
|
|
||||||
////std::cout << array[array.shape[0] - 1] << "]\n";
|
|
||||||
//printf("%f]\n", array[length - 1]);
|
|
||||||
|
|
||||||
char format_space[BUFFER_SIZE] = { 0 };
|
char format_space[BUFFER_SIZE] = { 0 };
|
||||||
snprintf(format_space, BUFFER_SIZE,"%s ", format);
|
snprintf(format_space, BUFFER_SIZE,"%s ", format);
|
||||||
char format_close[BUFFER_SIZE] = { 0 };
|
char format_close[BUFFER_SIZE] = { 0 };
|
||||||
@ -75,7 +67,7 @@ int32_t print_feat(const np::Array<uint8_t>& array, const np::Slice& slice) noex
|
|||||||
int32_t print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept {
|
int32_t print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int32_t num_written = 0;
|
||||||
if (array.shape.length == 1) {
|
if (array.shape.length == 1) {
|
||||||
const size_t max = slice.y - 1; //std::min(slice.y, array.shape[0] - 1);
|
const size_t max = slice.y - 1;
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
for (size_t i = slice.x; i < max; ++i)
|
for (size_t i = slice.x; i < max; ++i)
|
||||||
num_written += printf("%hu ", array[i]);
|
num_written += printf("%hu ", array[i]);
|
||||||
@ -98,7 +90,7 @@ int32_t print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept
|
|||||||
int32_t print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept {
|
int32_t print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int32_t num_written = 0;
|
||||||
if (array.shape.length == 1) {
|
if (array.shape.length == 1) {
|
||||||
const size_t max = slice.y - 1; //std::min(slice.y, array.shape[0] - 1);
|
const size_t max = slice.y - 1;
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
for (size_t i = slice.x; i < max; ++i)
|
for (size_t i = slice.x; i < max; ++i)
|
||||||
num_written += printf("%iu ", array[i]);
|
num_written += printf("%iu ", array[i]);
|
||||||
@ -121,7 +113,6 @@ int32_t print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept
|
|||||||
int32_t print(const np::Array<int32_t>& array, const np::Slice& slice) noexcept {
|
int32_t print(const np::Array<int32_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int32_t num_written = 0;
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
//size_t k = slice.x * array.shape[1] * array.shape[2] + slice.y * array.shape[2] + slice.z;
|
|
||||||
size_t k = slice.x * array.shape[1];
|
size_t k = slice.x * array.shape[1];
|
||||||
for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
|
for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
|
||||||
num_written += printf("%5i ", array[i]);
|
num_written += printf("%5i ", array[i]);
|
||||||
@ -133,7 +124,6 @@ int32_t print(const np::Array<int32_t>& array, const np::Slice& slice) noexcept
|
|||||||
int32_t print(const np::Array<uint16_t>& array, const np::Slice& slice) noexcept {
|
int32_t print(const np::Array<uint16_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int32_t num_written = 0;
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
//size_t k = slice.x * array.shape[1] * array.shape[2] + slice.y * array.shape[2] + slice.z;
|
|
||||||
size_t k = slice.x * array.shape[1];
|
size_t k = slice.x * array.shape[1];
|
||||||
for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
|
for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
|
||||||
num_written += printf("%5hu ", array[i]);
|
num_written += printf("%5hu ", array[i]);
|
||||||
@ -171,7 +161,6 @@ static inline np::Array<uint8_t> load_set(const char* const set_name) {
|
|||||||
if (c == ' ' || c == '\n') {
|
if (c == ' ' || c == '\n') {
|
||||||
buff[j] = '\0';
|
buff[j] = '\0';
|
||||||
a[i++] = static_cast<uint8_t>(atoi(buff));
|
a[i++] = static_cast<uint8_t>(atoi(buff));
|
||||||
//memset(buff, 0, STRING_INT_SIZE);
|
|
||||||
j = 0;
|
j = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -189,6 +178,11 @@ static inline np::Array<uint8_t> load_set(const char* const set_name) {
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Load the datasets.
|
||||||
|
*
|
||||||
|
* @return Array containing X_train, y_trait, X_test, y_test
|
||||||
|
*/
|
||||||
std::array<np::Array<uint8_t>, 4> load_datasets(void) {
|
std::array<np::Array<uint8_t>, 4> load_datasets(void) {
|
||||||
return {
|
return {
|
||||||
load_set(DATA_DIR "/X_train.bin"), load_set(DATA_DIR "/y_train.bin"),
|
load_set(DATA_DIR "/X_train.bin"), load_set(DATA_DIR "/y_train.bin"),
|
||||||
@ -201,10 +195,3 @@ void print_error_file(const char* const file_dir) noexcept {
|
|||||||
fprintf(stderr, "Can't open %s, error code = %d : %s\n", file_dir, errno, buff);
|
fprintf(stderr, "Can't open %s, error code = %d : %s\n", file_dir, errno, buff);
|
||||||
// delete buff;
|
// delete buff;
|
||||||
}
|
}
|
||||||
|
|
||||||
//size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
|
|
||||||
// size_t result = shape[offset];
|
|
||||||
// for(size_t i = 1 + offset; i < shape.length; ++i)
|
|
||||||
// result *= shape[i];
|
|
||||||
// return result;
|
|
||||||
//}
|
|
||||||
|
155
cpp/data.hpp
155
cpp/data.hpp
@ -35,10 +35,10 @@ namespace np {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
// #if __DEBUG
|
|
||||||
// print("Shape created (default)");
|
|
||||||
// #endif
|
|
||||||
Shape(void) noexcept {
|
Shape(void) noexcept {
|
||||||
|
#if __DEBUG
|
||||||
|
printf("Shape created (default)\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
@ -52,9 +52,9 @@ namespace np {
|
|||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
|
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Shape created (initializer)");
|
printf("Shape created (initializer)\n");
|
||||||
// #endif
|
#endif
|
||||||
const size_t* const begin = dims.begin();
|
const size_t* const begin = dims.begin();
|
||||||
for(size_t i = 0; i < length; ++i){
|
for(size_t i = 0; i < length; ++i){
|
||||||
data[i] = begin[i];
|
data[i] = begin[i];
|
||||||
@ -98,9 +98,9 @@ namespace np {
|
|||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape(Shape&& shape) noexcept {
|
Shape(Shape&& shape) noexcept {
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Shape created (move));
|
printf("Shape created (move)\n");
|
||||||
// #endif
|
#endif
|
||||||
if (data != nullptr && data != shape.data){
|
if (data != nullptr && data != shape.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape deleted (move)\n");
|
printf("Former shape deleted (move)\n");
|
||||||
@ -129,27 +129,27 @@ namespace np {
|
|||||||
__host__ __device__
|
__host__ __device__
|
||||||
~Shape(void) noexcept {
|
~Shape(void) noexcept {
|
||||||
if(refcount == nullptr){
|
if(refcount == nullptr){
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Shape refcount freed more than once");
|
printf("Shape refcount freed more than once\n");
|
||||||
// #endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
--(*refcount);
|
--(*refcount);
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// printf("Shape destructed : %lu\n", *refcount);
|
printf("Shape destructed : %lu\n", *refcount);
|
||||||
// #endif
|
#endif
|
||||||
if(*refcount == 0){
|
if(*refcount == 0){
|
||||||
if (data != nullptr){
|
if (data != nullptr){
|
||||||
delete[] data;
|
delete[] data;
|
||||||
data = nullptr;
|
data = nullptr;
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Shape freeing ...");
|
printf("Shape freeing ...\n");
|
||||||
// #endif
|
#endif
|
||||||
}
|
}
|
||||||
//#if __DEBUG
|
#if __DEBUG
|
||||||
else
|
else
|
||||||
printf("Shape freed more than once : %lu\n", *refcount);
|
printf("Shape freed more than once : %lu\n", *refcount);
|
||||||
//#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
refcount = nullptr;
|
refcount = nullptr;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
@ -191,9 +191,9 @@ namespace np {
|
|||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape& operator=(Shape&& shape) noexcept {
|
Shape& operator=(Shape&& shape) noexcept {
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Shape created (assign move)");
|
printf("Shape created (assign move)\n");
|
||||||
// #endif
|
#endif
|
||||||
if (data != nullptr && data != shape.data){
|
if (data != nullptr && data != shape.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape deleted (assign move)\n");
|
printf("Former shape deleted (assign move)\n");
|
||||||
@ -210,6 +210,8 @@ namespace np {
|
|||||||
data = shape.data;
|
data = shape.data;
|
||||||
refcount = shape.refcount;
|
refcount = shape.refcount;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
|
if (refcount == nullptr)
|
||||||
|
printf("Assigned copy shape has null refcount\n");
|
||||||
total = shape.total;
|
total = shape.total;
|
||||||
shape.total = 1;
|
shape.total = 1;
|
||||||
#endif
|
#endif
|
||||||
@ -261,37 +263,38 @@ namespace np {
|
|||||||
size_t* refcount = nullptr;
|
size_t* refcount = nullptr;
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
// #if __DEBUG
|
|
||||||
// print("Array created (default)");
|
|
||||||
Array(void) noexcept {
|
Array(void) noexcept {
|
||||||
|
#if __DEBUG
|
||||||
|
printf("Array created (default)\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
// #if __DEBUG
|
|
||||||
// print("Array created (raw, copy shape)");
|
|
||||||
// #endif
|
|
||||||
Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
||||||
|
#if __DEBUG
|
||||||
|
printf("Array created (raw, copy shape)\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Array created (raw empty, copy shape)");
|
printf("Array created (raw empty, copy shape)\n");
|
||||||
// #endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
// #if __DEBUG
|
|
||||||
// print("Array created (raw, move shape)");
|
|
||||||
// #endif
|
|
||||||
Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
||||||
|
#if __DEBUG
|
||||||
|
printf("Array created (raw, move shape)\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
// #if __DEBUG
|
|
||||||
// print("Array created (raw empty, move shape)");
|
|
||||||
// #endif
|
|
||||||
Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||||
|
#if __DEBUG
|
||||||
|
printf("Array created (raw empty, move shape)\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
@ -323,10 +326,10 @@ namespace np {
|
|||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
// #if __DEBUG
|
|
||||||
// print("Array created (move)");
|
|
||||||
// #endif
|
|
||||||
Array(Array&& array) noexcept : shape(std::move(array.shape)) {
|
Array(Array&& array) noexcept : shape(std::move(array.shape)) {
|
||||||
|
#if __DEBUG
|
||||||
|
printf("Array created (move)\n");
|
||||||
|
#endif
|
||||||
if (data != nullptr && data != array.data){
|
if (data != nullptr && data != array.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array deleted (move)\n");
|
printf("Former array deleted (move)\n");
|
||||||
@ -349,22 +352,22 @@ namespace np {
|
|||||||
__host__ __device__
|
__host__ __device__
|
||||||
~Array(void) noexcept {
|
~Array(void) noexcept {
|
||||||
if(refcount == nullptr){
|
if(refcount == nullptr){
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Array refcount freed more than once");
|
printf("Array refcount freed more than once\n");
|
||||||
// #endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
--(*refcount);
|
--(*refcount);
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// printf("Array destructed : %lu\n", *refcount);
|
printf("Array destructed : %lu\n", *refcount);
|
||||||
// #endif
|
#endif
|
||||||
if(*refcount == 0){
|
if(*refcount == 0){
|
||||||
if (data != nullptr){
|
if (data != nullptr){
|
||||||
delete[] data;
|
delete[] data;
|
||||||
data = nullptr;
|
data = nullptr;
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Array freeing ...");
|
printf("Array freeing ...\n");
|
||||||
// #endif
|
#endif
|
||||||
}
|
}
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
else
|
else
|
||||||
@ -400,16 +403,16 @@ namespace np {
|
|||||||
(*refcount)++;
|
(*refcount)++;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
printf("Assigned array has null refcount\n");
|
printf("Assigned array has null refcount\n");
|
||||||
|
#endif
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array& operator=(Array&& array) noexcept {
|
Array& operator=(Array&& array) noexcept {
|
||||||
// #if __DEBUG
|
#if __DEBUG
|
||||||
// print("Array created (assign move)");
|
printf("Array created (assign move)\n");
|
||||||
// #endif
|
#endif
|
||||||
if (data != nullptr && data != array.data){
|
if (data != nullptr && data != array.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array deleted (assign move)\n");
|
printf("Former array deleted (assign move)\n");
|
||||||
@ -786,48 +789,6 @@ static size_t as_partition(const T* const a, uint16_t* const indices, const size
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
void argsort(const T* const a, uint16_t* const indices, const size_t& l, const size_t& h) noexcept {
|
|
||||||
const size_t total = h - l + 1;
|
|
||||||
|
|
||||||
size_t* const stack = new size_t[total]{l, h};
|
|
||||||
size_t top = 1, low = l, high = h;
|
|
||||||
|
|
||||||
while (top <= total) {
|
|
||||||
high = stack[top--];
|
|
||||||
low = stack[top--];
|
|
||||||
if(low >= high)
|
|
||||||
break;
|
|
||||||
|
|
||||||
const size_t p = as_partition(a, indices, low, high);
|
|
||||||
|
|
||||||
if (p - 1 > low && p - 1 < total) {
|
|
||||||
stack[++top] = low;
|
|
||||||
stack[++top] = p - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p + 1 < high) {
|
|
||||||
stack[++top] = p + 1;
|
|
||||||
stack[++top] = high;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
delete[] stack;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
np::Array<uint16_t> argsort(const np::Array<T>& other, const size_t& l, const size_t& h) noexcept {
|
|
||||||
np::Array<uint16_t> indices = np::empty(other.shape);
|
|
||||||
map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; });
|
|
||||||
|
|
||||||
argsort(other, indices, l, h);
|
|
||||||
return indices;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
np::Array<uint16_t> argsort(const np::Array<T>* const other, const size_t& length) noexcept {
|
|
||||||
return argsort(other, 0, length - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::array<np::Array<uint8_t>, 4> load_datasets(void);
|
std::array<np::Array<uint8_t>, 4> load_datasets(void);
|
||||||
void print_error_file(const char* const) noexcept;
|
void print_error_file(const char* const) noexcept;
|
||||||
|
|
||||||
@ -838,10 +799,10 @@ void save(const np::Array<T>& d, const char* const filename) {
|
|||||||
print_error_file(filename);
|
print_error_file(filename);
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
assert(d.shape.refcount != 0);//, "Refcount shape is zero !!");
|
assert(d.shape.refcount != 0);
|
||||||
fwrite(&d.shape.length, sizeof(size_t), 1, output);
|
fwrite(&d.shape.length, sizeof(size_t), 1, output);
|
||||||
fwrite(d.shape.data, sizeof(size_t), d.shape.length, output);
|
fwrite(d.shape.data, sizeof(size_t), d.shape.length, output);
|
||||||
assert(d.refcount != 0);//, "Refcount array is zero !!");
|
assert(d.refcount != 0);
|
||||||
fwrite(d.data, sizeof(T), np::prod(d.shape), output);
|
fwrite(d.data, sizeof(T), np::prod(d.shape), output);
|
||||||
fclose(output);
|
fclose(output);
|
||||||
}
|
}
|
||||||
|
16
cpp/data_device.cu
Normal file
16
cpp/data_device.cu
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#include "data.hpp"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Product of every elements in a given shape after a given offset.
|
||||||
|
*
|
||||||
|
* @param shape Shape to product over
|
||||||
|
* @param offset Skip offset
|
||||||
|
* @return Scalar product
|
||||||
|
*/
|
||||||
|
__host__ __device__
|
||||||
|
size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
|
||||||
|
size_t result = shape[offset];
|
||||||
|
for(size_t i = 1 + offset; i < shape.length; ++i)
|
||||||
|
result *= shape[i];
|
||||||
|
return result;
|
||||||
|
}
|
@ -4,20 +4,13 @@
|
|||||||
#include "config.hpp"
|
#include "config.hpp"
|
||||||
#include "toolbox_unit_test.hpp"
|
#include "toolbox_unit_test.hpp"
|
||||||
#include "ViolaJones.hpp"
|
#include "ViolaJones.hpp"
|
||||||
|
#include "ViolaJones_device.hpp"
|
||||||
|
|
||||||
#if GPU_BOOSTED
|
#if GPU_BOOSTED
|
||||||
#include "ViolaJonesGPU.hpp"
|
|
||||||
#include "gpu_unit_test.hpp"
|
#include "gpu_unit_test.hpp"
|
||||||
#define LABEL "GPU"
|
#define LABEL "GPU"
|
||||||
#define apply_features apply_features_gpu
|
|
||||||
#define set_integral_image set_integral_image_gpu
|
|
||||||
#define argsort_2d argsort_2d_gpu
|
|
||||||
#else
|
#else
|
||||||
#include "ViolaJonesCPU.hpp"
|
|
||||||
#define LABEL "CPU"
|
#define LABEL "CPU"
|
||||||
#define apply_features apply_features_cpu
|
|
||||||
#define set_integral_image set_integral_image_cpu
|
|
||||||
#define argsort_2d argsort_2d_cpu
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -28,7 +21,7 @@
|
|||||||
* - Calculate features
|
* - Calculate features
|
||||||
* - Calculate integral images
|
* - Calculate integral images
|
||||||
* - Apply features to images
|
* - Apply features to images
|
||||||
* - Calculate argsort of the featured images.
|
* - Calculate argsort of the featured images
|
||||||
*
|
*
|
||||||
* @return std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> Tuple containing in order : training features, training features sorted indexes, training labels, testing features, testing labels
|
* @return std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> Tuple containing in order : training features, training features sorted indexes, training labels, testing features, testing labels
|
||||||
*/
|
*/
|
||||||
@ -40,7 +33,7 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
|
|
||||||
const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns();
|
const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns();
|
||||||
const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 };
|
const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 };
|
||||||
header({ "Preprocessing", "Time spent (ns)", "Formatted time spent" }, preproc_gaps);
|
header(preproc_gaps, { "Preprocessing", "Time spent (ns)", "Formatted time spent" });
|
||||||
|
|
||||||
const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", preproc_gaps[0], { "X_train", "y_train", "X_test", "y_test" },
|
const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", preproc_gaps[0], { "X_train", "y_train", "X_test", "y_test" },
|
||||||
FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets);
|
FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets);
|
||||||
@ -97,8 +90,7 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// const np::Array<int32_t> indices = measure_time_save<Array<int>>("Selecting best features", preproc_gaps[0], "indices", select_percentile, X_train_feat, d.y_train);
|
// const np::Array<int32_t> indices = state_saver<int32_t>("Selecting best features", preproc_gaps[0], "indices", select_percentile, X_train_feat, d.y_train);
|
||||||
// const np::Array<int32_t> indices = measure_time<Array<int>>("Selecting best features", preproc_gaps[0], select_percentile, X_train_feat, d.y_train);
|
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
// print_feature(indices);
|
// print_feature(indices);
|
||||||
@ -113,13 +105,13 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL,
|
const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL,
|
||||||
// FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
|
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
// printf("X_test_feat_argsort\n");
|
printf("X_test_feat_argsort\n");
|
||||||
// print(X_test_feat_argsort.shape);
|
print(X_test_feat_argsort.shape);
|
||||||
// print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
#endif
|
#endif
|
||||||
const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp);
|
const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp);
|
||||||
formatted_line(preproc_gaps, "├", "┼", "─", "┤");
|
formatted_line(preproc_gaps, "├", "┼", "─", "┤");
|
||||||
@ -131,15 +123,15 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
/**
|
/**
|
||||||
* @brief Train the weak classifiers.
|
* @brief Train the weak classifiers.
|
||||||
*
|
*
|
||||||
* @param X_train_feat Training images.
|
* @param X_train_feat Training images
|
||||||
* @param X_train_feat_argsort Sorted indexes of the training images features.
|
* @param X_train_feat_argsort Sorted indexes of the training images features
|
||||||
* @param y_train Training labels.
|
* @param y_train Training labels
|
||||||
* @return Trained models
|
* @return List of trained models
|
||||||
*/
|
*/
|
||||||
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) noexcept {
|
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) noexcept {
|
||||||
const std::chrono::system_clock::time_point training_timestamp = perf_counter_ns();
|
const std::chrono::system_clock::time_point training_timestamp = perf_counter_ns();
|
||||||
const std::array<int32_t, 3> training_gaps = { 26, -18, 29 };
|
const std::array<int32_t, 3> training_gaps = { 26, -18, 29 };
|
||||||
header({ "Training", "Time spent (ns)", "Formatted time spent" }, training_gaps);
|
header(training_gaps, { "Training", "Time spent (ns)", "Formatted time spent" });
|
||||||
|
|
||||||
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> models;
|
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> models;
|
||||||
|
|
||||||
@ -173,14 +165,15 @@ std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array
|
|||||||
/**
|
/**
|
||||||
* @brief Benchmark the trained classifiers on the training and testing sets.
|
* @brief Benchmark the trained classifiers on the training and testing sets.
|
||||||
*
|
*
|
||||||
* @param X_train_feat Training features.
|
* @param models List of trained models
|
||||||
* @param y_train Training labels.
|
* @param X_train_feat Training features
|
||||||
* @param X_test_feat Testing features.
|
* @param y_train Training labels
|
||||||
* @param y_test Testing labels.
|
* @param X_test_feat Testing features
|
||||||
|
* @param y_test Testing labels
|
||||||
*/
|
*/
|
||||||
void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>, TS.size()>& models, const np::Array<int32_t>& X_train_feat, const np::Array<uint8_t>& y_train, const np::Array<int32_t>& X_test_feat, const np::Array<uint8_t>& y_test) {
|
void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>, TS.size()>& models, const np::Array<int32_t>& X_train_feat, const np::Array<uint8_t>& y_train, const np::Array<int32_t>& X_test_feat, const np::Array<uint8_t>& y_test) {
|
||||||
const std::array<int32_t, 5> testing_gaps = { 26, -19, 24, -19, 24 };
|
const std::array<int32_t, 5> testing_gaps = { 26, -19, 24, -19, 24 };
|
||||||
header({ "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" }, testing_gaps);
|
header(testing_gaps, { "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" });
|
||||||
std::array<std::array<float64_t, 8>, TS.size()> results;
|
std::array<std::array<float64_t, 8>, TS.size()> results;
|
||||||
|
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
@ -216,7 +209,7 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
|
|||||||
footer(testing_gaps);
|
footer(testing_gaps);
|
||||||
|
|
||||||
const std::array<int32_t, 9> evaluating_gaps = { 19, -7, -6, -6, -6, -7, -6, -6, -6 };
|
const std::array<int32_t, 9> evaluating_gaps = { 19, -7, -6, -6, -6, -7, -6, -6, -6 };
|
||||||
header({ "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"}, evaluating_gaps);
|
header(evaluating_gaps, { "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"});
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
for (const size_t T : TS) {
|
for (const size_t T : TS) {
|
||||||
@ -231,14 +224,13 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
|
|||||||
/**
|
/**
|
||||||
* @brief Test if the each result is equals to other devices.
|
* @brief Test if the each result is equals to other devices.
|
||||||
*
|
*
|
||||||
* Given ViolaJones is a fully deterministic algorithm.
|
* Given ViolaJones is a fully deterministic algorithm. The results, regardless the device, should be the same,
|
||||||
* The results, regardless the device, should be the same.
|
* this function check this assertion.
|
||||||
* This function check this assertion.
|
|
||||||
*/
|
*/
|
||||||
void unit_test(void) {
|
void unit_test(void) {
|
||||||
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
|
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
|
||||||
const std::array<int32_t, 4> unit_gaps = { 37, -10, -18, 29};
|
const std::array<int32_t, 4> unit_gaps = { 37, -10, -18, 29};
|
||||||
header({ "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" }, unit_gaps);
|
header(unit_gaps, { "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" });
|
||||||
|
|
||||||
char title[BUFFER_SIZE] = { 0 };
|
char title[BUFFER_SIZE] = { 0 };
|
||||||
char tmp_title[BUFFER_SIZE / 2] = { 0 };
|
char tmp_title[BUFFER_SIZE / 2] = { 0 };
|
||||||
@ -300,7 +292,7 @@ void unit_test(void) {
|
|||||||
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label);
|
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label);
|
||||||
snprintf(title, BUFFER_SIZE, "%-22s - CPU argsort", tmp_title);
|
snprintf(title, BUFFER_SIZE, "%-22s - CPU argsort", tmp_title);
|
||||||
test_fnc(title, [&X_feat, &X_feat_argsort_cpu, &file_cpu]{
|
test_fnc(title, [&X_feat, &X_feat_argsort_cpu, &file_cpu]{
|
||||||
X_feat_argsort_cpu = std::move(load<uint16_t>(file_cpu));
|
X_feat_argsort_cpu = load<uint16_t>(file_cpu);
|
||||||
return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_cpu);
|
return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_cpu);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -311,7 +303,7 @@ void unit_test(void) {
|
|||||||
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label);
|
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label);
|
||||||
snprintf(title, BUFFER_SIZE, "%-22s - GPU argsort", tmp_title);
|
snprintf(title, BUFFER_SIZE, "%-22s - GPU argsort", tmp_title);
|
||||||
test_fnc(title, [&X_feat, &X_feat_argsort_gpu, &file_gpu]{
|
test_fnc(title, [&X_feat, &X_feat_argsort_gpu, &file_gpu]{
|
||||||
X_feat_argsort_gpu = std::move(load<uint16_t>(file_gpu));
|
X_feat_argsort_gpu = load<uint16_t>(file_gpu);
|
||||||
return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_gpu);
|
return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_gpu);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -355,7 +347,7 @@ int32_t main(void){
|
|||||||
|
|
||||||
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
|
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
|
||||||
const std::array<int32_t, 3> unit_gaps = { 27, -18, 29 };
|
const std::array<int32_t, 3> unit_gaps = { 27, -18, 29 };
|
||||||
header({ "Unit testing", "Time spent (ns)", "Formatted time spent" }, unit_gaps);
|
header(unit_gaps, { "Unit testing", "Time spent (ns)", "Formatted time spent" });
|
||||||
#if GPU_BOOSTED
|
#if GPU_BOOSTED
|
||||||
benchmark_function_void("Testing GPU capabilities 1D", unit_gaps[0], test_working, 50000);
|
benchmark_function_void("Testing GPU capabilities 1D", unit_gaps[0], test_working, 50000);
|
||||||
benchmark_function_void("Testing GPU capabilities 2D", unit_gaps[0], test_working_2d, 200, 500);
|
benchmark_function_void("Testing GPU capabilities 2D", unit_gaps[0], test_working_2d, 200, 500);
|
||||||
|
@ -11,7 +11,7 @@ static const constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3
|
|||||||
* @brief Format the time in seconds in human readable format.
|
* @brief Format the time in seconds in human readable format.
|
||||||
*
|
*
|
||||||
* @param time number of seconds
|
* @param time number of seconds
|
||||||
* @return The formatted human readable string.
|
* @return The formatted human readable string
|
||||||
*/
|
*/
|
||||||
std::string format_time(uint64_t time) noexcept {
|
std::string format_time(uint64_t time) noexcept {
|
||||||
if (time == 0)
|
if (time == 0)
|
||||||
@ -38,7 +38,7 @@ std::string format_time(uint64_t time) noexcept {
|
|||||||
* @brief Format the time in nanoseconds in human readable format.
|
* @brief Format the time in nanoseconds in human readable format.
|
||||||
*
|
*
|
||||||
* @param time Time in nanoseconds
|
* @param time Time in nanoseconds
|
||||||
* @return std::string The formatted human readable string.
|
* @return std::string The formatted human readable string
|
||||||
*/
|
*/
|
||||||
std::string format_time_ns(uint64_t time) noexcept {
|
std::string format_time_ns(uint64_t time) noexcept {
|
||||||
if (time == 0)
|
if (time == 0)
|
||||||
@ -115,13 +115,4 @@ std::string thousand_sep(uint64_t k, const char& separator) noexcept {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
|
|
||||||
//uint64_t len = n.length(), dlen = 3;
|
|
||||||
|
|
||||||
//while (len > dlen) {
|
|
||||||
// n.insert(len - dlen, 1, separator);
|
|
||||||
// dlen += 4;
|
|
||||||
// len += 1;
|
|
||||||
//}
|
|
||||||
//return n;
|
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,13 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Print a formatted row of titles with of gaps seperated by a separator.
|
||||||
|
*
|
||||||
|
* @param gaps List of size gaps
|
||||||
|
* @param titles List of titles
|
||||||
|
* @param separator Separator character between each gap
|
||||||
|
*/
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles,
|
constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles,
|
||||||
const char* const separator = "│") noexcept {
|
const char* const separator = "│") noexcept {
|
||||||
@ -11,10 +18,19 @@ constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::arra
|
|||||||
printf("%s\n", separator);
|
printf("%s\n", separator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Print a formatted line of repeated characters.
|
||||||
|
*
|
||||||
|
* @param gaps List of size gaps
|
||||||
|
* @param right Character on the left
|
||||||
|
* @param middle Character between each separator
|
||||||
|
* @param separator Separator character between each gap
|
||||||
|
* @param left Character on the right
|
||||||
|
*/
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* const right, const char* const middle,
|
constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* const left, const char* const middle,
|
||||||
const char* const separator, const char* const left) noexcept {
|
const char* const separator, const char* const right) noexcept {
|
||||||
printf("%s", right);
|
printf("%s", left);
|
||||||
for(size_t i = 0; i < N; ++i){
|
for(size_t i = 0; i < N; ++i){
|
||||||
for(int32_t j = std::abs(gaps[i]) + 2; j > 0; --j)
|
for(int32_t j = std::abs(gaps[i]) + 2; j > 0; --j)
|
||||||
printf("%s", separator);
|
printf("%s", separator);
|
||||||
@ -22,16 +38,27 @@ constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* co
|
|||||||
printf("%s", middle);
|
printf("%s", middle);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("%s\n", left);
|
printf("%s\n", right);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Print a formatted header with the given titles and sizes.
|
||||||
|
*
|
||||||
|
* @param gaps List of size gaps
|
||||||
|
* @param titles List of titles
|
||||||
|
*/
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
constexpr void header(const std::array<const char* const, N>& titles, const std::array<int32_t, N>& gaps) noexcept {
|
constexpr void header(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles) noexcept {
|
||||||
formatted_line(gaps, "┌", "┬", "─", "┐");
|
formatted_line(gaps, "┌", "┬", "─", "┐");
|
||||||
formatted_row(gaps, titles);
|
formatted_row(gaps, titles);
|
||||||
formatted_line(gaps, "├", "┼", "─", "┤");
|
formatted_line(gaps, "├", "┼", "─", "┤");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Print a formatted footer with the given sizes.
|
||||||
|
*
|
||||||
|
* @param gaps List of size gaps
|
||||||
|
*/
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
|
constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
|
||||||
formatted_line(gaps, "└", "┴", "─", "┘");
|
formatted_line(gaps, "└", "┴", "─", "┘");
|
||||||
@ -40,7 +67,36 @@ constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
|
|||||||
#define duration_ns(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count()
|
#define duration_ns(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count()
|
||||||
#define perf_counter_ns() std::chrono::high_resolution_clock::now()
|
#define perf_counter_ns() std::chrono::high_resolution_clock::now()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Format the time in seconds in human readable format.
|
||||||
|
*
|
||||||
|
* @param time number of seconds
|
||||||
|
* @return The formatted human readable string
|
||||||
|
*/
|
||||||
std::string format_time(uint64_t) noexcept;
|
std::string format_time(uint64_t) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Format the time in nanoseconds in human readable format.
|
||||||
|
*
|
||||||
|
* @param time Time in nanoseconds
|
||||||
|
* @return std::string The formatted human readable string
|
||||||
|
*/
|
||||||
std::string format_time_ns(uint64_t) noexcept;
|
std::string format_time_ns(uint64_t) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert the number of byte in JEDEC standard form.
|
||||||
|
* See more : https://en.wikipedia.org/wiki/JEDEC_memory_standards
|
||||||
|
*
|
||||||
|
* @param bytes Number of bytes
|
||||||
|
* @return JEDEC compliant formatted number of bytes
|
||||||
|
*/
|
||||||
std::string format_byte_size(uint64_t) noexcept;
|
std::string format_byte_size(uint64_t) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Format a number with a separator (i.e. 1000 as 1,000)
|
||||||
|
*
|
||||||
|
* @param k number to format
|
||||||
|
* @param separator used between each thouand
|
||||||
|
* @return Formatted number
|
||||||
|
*/
|
||||||
std::string thousand_sep(uint64_t, const char& = ',') noexcept;
|
std::string thousand_sep(uint64_t, const char& = ',') noexcept;
|
||||||
|
@ -2,14 +2,25 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test if a given result is equal of the expected one and log result
|
||||||
|
*
|
||||||
|
* @tparam T type of returning values
|
||||||
|
* @param name of the unit test
|
||||||
|
* @param expected result of the function call
|
||||||
|
* @param result of the function
|
||||||
|
*/
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void Assert(const char* const name, const T& expected, const T& result) noexcept {
|
static void Assert(const char* const name, const T& expected, const T& result) noexcept {
|
||||||
if(expected != result){
|
if(expected != result){
|
||||||
std::cerr << "For test named " << name << " Expected '" << expected << "' but got '" << result << "' instead\n";
|
std::cerr << "For test named " << name << " Expected '" << expected << "' but got '" << result << "' instead\n";
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test suite for the format_byte_size output
|
||||||
|
*/
|
||||||
void format_byte_size_test(void) noexcept {
|
void format_byte_size_test(void) noexcept {
|
||||||
Assert("format_byte_size null", std::string("0B"), format_byte_size(static_cast<uint64_t>(0)));
|
Assert("format_byte_size null", std::string("0B"), format_byte_size(static_cast<uint64_t>(0)));
|
||||||
Assert("format_byte_size byte", std::string("1B"), format_byte_size(static_cast<uint64_t>(1)));
|
Assert("format_byte_size byte", std::string("1B"), format_byte_size(static_cast<uint64_t>(1)));
|
||||||
@ -26,6 +37,9 @@ void format_byte_size_test(void) noexcept {
|
|||||||
Assert("format_byte_size max", std::string("15EB 1023PB 1023TB 1023GB 1023MB 1023KB 1023B"), format_byte_size(static_cast<uint64_t>(-1)));
|
Assert("format_byte_size max", std::string("15EB 1023PB 1023TB 1023GB 1023MB 1023KB 1023B"), format_byte_size(static_cast<uint64_t>(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test suite for the format_time output
|
||||||
|
*/
|
||||||
void format_time_test(void) noexcept {
|
void format_time_test(void) noexcept {
|
||||||
// https://en.wikipedia.org/wiki/Unit_of_time
|
// https://en.wikipedia.org/wiki/Unit_of_time
|
||||||
Assert("format_time null", std::string("0s"), format_time(static_cast<uint64_t>(0)));
|
Assert("format_time null", std::string("0s"), format_time(static_cast<uint64_t>(0)));
|
||||||
@ -80,6 +94,9 @@ void format_time_test(void) noexcept {
|
|||||||
Assert("format_time max", std::string("5849424173c 55y 3w 5j 7h 15s"), format_time(static_cast<uint64_t>(-1)));
|
Assert("format_time max", std::string("5849424173c 55y 3w 5j 7h 15s"), format_time(static_cast<uint64_t>(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test suite for the format_time_ns output
|
||||||
|
*/
|
||||||
void format_time_ns_test(void) noexcept {
|
void format_time_ns_test(void) noexcept {
|
||||||
// https://en.wikipedia.org/wiki/Unit_of_time
|
// https://en.wikipedia.org/wiki/Unit_of_time
|
||||||
Assert("format_time_ns null", std::string("0ns"), format_time_ns(static_cast<uint64_t>(0)));
|
Assert("format_time_ns null", std::string("0ns"), format_time_ns(static_cast<uint64_t>(0)));
|
||||||
@ -140,6 +157,9 @@ void format_time_ns_test(void) noexcept {
|
|||||||
Assert("format_time_ns max", std::string("5c 84y 11M 2j 23h 34m 33s 709ms 551us 615ns"), format_time_ns(static_cast<uint64_t>(-1)));
|
Assert("format_time_ns max", std::string("5c 84y 11M 2j 23h 34m 33s 709ms 551us 615ns"), format_time_ns(static_cast<uint64_t>(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test suite for the thousand_sep output
|
||||||
|
*/
|
||||||
void thousand_sep_test(void) noexcept {
|
void thousand_sep_test(void) noexcept {
|
||||||
// https://en.wikipedia.org/wiki/Names_of_large_numbers
|
// https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
Assert("thousand_sep null", std::string("0"), thousand_sep(static_cast<uint64_t>(0)));
|
Assert("thousand_sep null", std::string("0"), thousand_sep(static_cast<uint64_t>(0)));
|
||||||
|
@ -1,6 +1,21 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test suite for the format_byte_size output
|
||||||
|
*/
|
||||||
void format_byte_size_test(void) noexcept;
|
void format_byte_size_test(void) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test suite for the format_time output
|
||||||
|
*/
|
||||||
void format_time_test(void) noexcept;
|
void format_time_test(void) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test suite for the format_time_ns output
|
||||||
|
*/
|
||||||
void format_time_ns_test(void) noexcept;
|
void format_time_ns_test(void) noexcept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Test suite for the thousand_sep output
|
||||||
|
*/
|
||||||
void thousand_sep_test(void) noexcept;
|
void thousand_sep_test(void) noexcept;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user