321 lines
14 KiB
C++
321 lines
14 KiB
C++
#include <filesystem>
|
|
namespace fs = std::filesystem;
|
|
#include "data.hpp"
|
|
#include "toolbox.hpp"
|
|
#include "config.hpp"
|
|
#include "ViolaJones.hpp"
|
|
#include "ViolaJonesGPU.hpp"
|
|
#include "ViolaJonesCPU.hpp"
|
|
|
|
void test_float() noexcept;
|
|
|
|
#ifdef __DEBUG
|
|
// #define IDX_INSPECT 0
|
|
// #define IDX_INSPECT 2
|
|
#define IDX_INSPECT 4548
|
|
#define IDX_INSPECT_OFFSET 100
|
|
#endif
|
|
|
|
#if GPU_BOOSTED
|
|
#define LABEL "GPU"
|
|
#define apply_features apply_features_gpu
|
|
#define set_integral_image set_integral_image_gpu
|
|
#define argsort_2d argsort_2d_gpu
|
|
#else
|
|
#define LABEL "CPU"
|
|
#define apply_features apply_features_cpu
|
|
#define set_integral_image set_integral_image_cpu
|
|
#define argsort_2d argsort_2d_cpu
|
|
#endif
|
|
|
|
std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> preprocessing() {
|
|
// Creating state saver folders if they don't exist already
|
|
if (SAVE_STATE)
|
|
for (const char* const folder_name : { "models", "out" })
|
|
fs::create_directory(folder_name);
|
|
|
|
printf("| %-49s | %-17s | %-29s |\n", "Preprocessing", "Time spent (ns)", "Formatted time spent");
|
|
printf("|%s|%s|%s|\n", S(51), S(19), S(31));
|
|
|
|
const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", {"X_train", "y_train", "X_test", "y_test"},
|
|
FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets);
|
|
|
|
#ifdef __DEBUG
|
|
// print("X_train");
|
|
// print(X_train.shape);
|
|
// print(X_train, { IDX_INSPECT });
|
|
// print("X_test");
|
|
// print(X_test.shape);
|
|
// print(X_test, { IDX_INSPECT });
|
|
// print("y_train");
|
|
// print(y_train.shape);
|
|
// print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
|
// print("y_test");
|
|
// print(y_test.shape);
|
|
// print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
|
#endif
|
|
|
|
const np::Array<uint8_t> feats = state_saver<uint8_t>("Building features", "feats",
|
|
FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]);
|
|
|
|
#ifdef __DEBUG
|
|
// print("feats");
|
|
// print(feats.shape);
|
|
// print_feat(feats, { IDX_INSPECT });
|
|
#endif
|
|
|
|
const np::Array<uint32_t> X_train_ii = state_saver<uint32_t>("Converting training set to integral images (" LABEL ")", "X_train_ii_" LABEL,
|
|
FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_train);
|
|
const np::Array<uint32_t> X_test_ii = state_saver<uint32_t>("Converting testing set to integral images (" LABEL ")", "X_test_ii_" LABEL,
|
|
FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test);
|
|
|
|
#ifdef __DEBUG
|
|
// print("X_train_ii");
|
|
// print(X_train_ii.shape);
|
|
// print(X_train_ii, { IDX_INSPECT });
|
|
// print("X_test_ii");
|
|
// print(X_test_ii.shape);
|
|
// print(X_test_ii, { IDX_INSPECT });
|
|
// return {};
|
|
#endif
|
|
|
|
const np::Array<int32_t> X_train_feat = state_saver<int32_t>("Applying features to training set (" LABEL ")", "X_train_feat_" LABEL,
|
|
FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_train_ii);
|
|
const np::Array<int32_t> X_test_feat = state_saver<int32_t>("Applying features to testing set (" LABEL ")", "X_test_feat_" LABEL,
|
|
FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii);
|
|
|
|
#ifdef __DEBUG
|
|
// print("X_train_feat");
|
|
// print(X_train_feat.shape);
|
|
// print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
|
// print("X_test_feat");
|
|
// print(X_test_feat.shape);
|
|
// print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
|
#endif
|
|
|
|
// const Array<int> indices = measure_time_save<Array<int>>("Selecting best features", "indices", select_percentile, X_train_feat, d.y_train);
|
|
// const Array<int> indices = measure_time<Array<int>>("Selecting best features", select_percentile, X_train_feat, d.y_train);
|
|
|
|
#ifdef __DEBUG
|
|
// print_feature(indices);
|
|
#endif
|
|
|
|
const np::Array<uint16_t> X_train_feat_argsort = state_saver<uint16_t>("Precalculating training set argsort (" LABEL ")", "X_train_feat_argsort_" LABEL,
|
|
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat);
|
|
|
|
#ifdef __DEBUG
|
|
print("X_train_feat_argsort");
|
|
print(X_train_feat_argsort.shape);
|
|
print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
|
#endif
|
|
|
|
// const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", "X_test_feat_argsort_" LABEL,
|
|
// FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
|
|
|
|
#ifdef __DEBUG
|
|
// print("X_test_feat_argsort");
|
|
// print(X_test_feat_argsort.shape);
|
|
// print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
|
#endif
|
|
|
|
return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test };
|
|
}
|
|
|
|
void train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) {
|
|
printf("\n| %-49s | %-17s | %-29s |\n", "Training", "Time spent (ns)", "Formatted time spent");
|
|
printf("|%s|%s|%s|\n", S(51), S(19), S(31));
|
|
|
|
for (const size_t T : TS) {
|
|
char title[BUFFER_SIZE] = { 0 };
|
|
char alphas_title[BUFFER_SIZE] = { 0 };
|
|
char final_classifiers_title[BUFFER_SIZE] = { 0 };
|
|
sprintf(title, "ViolaJones T = %-4lu (%s)", T, LABEL);
|
|
sprintf(alphas_title, "alphas_%lu_%s", T, LABEL);
|
|
sprintf(final_classifiers_title, "final_classifiers_%lu_%s", T, LABEL);
|
|
|
|
#ifdef __DEBUG
|
|
const auto [ alphas, final_classifiers ] = state_saver<float64_t, 2>(title, { alphas_title, final_classifiers_title },
|
|
#else
|
|
state_saver<float64_t, 2>(title, { alphas_title, final_classifiers_title },
|
|
#endif
|
|
FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train);
|
|
#ifdef __DEBUG
|
|
print("alphas");
|
|
print(alphas);
|
|
print("final_classifiers");
|
|
print(final_classifiers);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void testing_and_evaluating(const np::Array<int32_t>& X_train_feat, const np::Array<uint8_t>& y_train, const np::Array<int32_t>& X_test_feat, const np::Array<uint8_t>& y_test) {
|
|
printf("\n| %-26s | Time spent (ns) (E) | %-29s | Time spent (ns) (T) | %-29s |\n", "Testing", "Formatted time spent (E)", "Formatted time spent (T)");
|
|
printf("|%s|%s|%s|%s|%s|\n", S(28), S(21), S(31), S(21), S(31));
|
|
|
|
constexpr const size_t NT = sizeof(TS) / sizeof(size_t);
|
|
std::array<std::array<float64_t, 8>, NT> results;
|
|
|
|
size_t i = 0;
|
|
for (const size_t T : TS) {
|
|
char title[BUFFER_SIZE] = { 0 };
|
|
char alphas_title[BUFFER_SIZE] = { 0 };
|
|
char final_classifiers_title[BUFFER_SIZE] = { 0 };
|
|
sprintf(title, "ViolaJones T = %-4lu (%s)", T, LABEL);
|
|
sprintf(alphas_title, MODEL_DIR "/alphas_%lu_%s.bin", T, LABEL);
|
|
sprintf(final_classifiers_title, MODEL_DIR "/final_classifiers_%lu_%s.bin", T, LABEL);
|
|
|
|
const np::Array<float64_t> alphas = load<float64_t>(alphas_title);
|
|
const np::Array<float64_t> final_classifiers = load<float64_t>(final_classifiers_title);
|
|
|
|
auto start = time();
|
|
const np::Array<uint8_t> y_pred_train = classify_viola_jones(alphas, final_classifiers, X_train_feat);
|
|
const long long t_pred_train = duration_ns(time() - start);
|
|
const float64_t e_acc = accuracy_score(y_train, y_pred_train);
|
|
const float64_t e_f1 = f1_score(y_train, y_pred_train);
|
|
float64_t e_FN, e_FP;
|
|
std::tie(std::ignore, e_FN, e_FP, std::ignore) = confusion_matrix(y_train, y_pred_train);
|
|
|
|
start = time();
|
|
const np::Array<uint8_t> y_pred_test = classify_viola_jones(alphas, final_classifiers, X_test_feat);
|
|
const long long t_pred_test = duration_ns(time() - start);
|
|
const float64_t t_acc = accuracy_score(y_test, y_pred_test);
|
|
const float64_t t_f1 = f1_score(y_test, y_pred_test);
|
|
float64_t t_FN, t_FP;
|
|
std::tie(std::ignore, t_FN, t_FP, std::ignore) = confusion_matrix(y_test, y_pred_test);
|
|
results[i++] = { e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP };
|
|
|
|
printf("| %-26s | %'19lld | %-29s | %'19lld | %-29s |\n", title, t_pred_train, format_time_ns(t_pred_train).c_str(), t_pred_test, format_time_ns(t_pred_test).c_str());
|
|
}
|
|
|
|
printf("\n| %-19s | ACC (E) | F1 (E) | FN (E) | FP (E) | ACC (T) | F1 (T) | FN (T) | FP (T) |\n", "Evaluating");
|
|
printf("|%s|%s|%s|%s|%s|%s|%s|%s|%s|\n", S(21), S(9), S(8), S(8), S(8), S(9), S(8), S(8), S(8));
|
|
|
|
i = 0;
|
|
for (const size_t T : TS) {
|
|
char title[BUFFER_SIZE] = { 0 };
|
|
sprintf(title, "ViolaJones T = %-4lu", T);
|
|
const auto [e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP] = results[i++];
|
|
printf("| %-19s | %'6.2f%% | %'6.2f | %'6.0f | %'6.0f | %6.2f%% | %'6.2f | %'6.0f | %'6.0f |\n", title, e_acc * 100, e_f1, e_FN, e_FP, t_acc * 100, t_f1, t_FN, t_FP);
|
|
}
|
|
}
|
|
|
|
void final_unit_test() {
|
|
printf("\n| %-49s | %-10s | %-17s | %-29s |\n", "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent");
|
|
printf("|%s|%s|%s|%s|\n", S(51), S(12), S(19), S(31));
|
|
|
|
if(fs::exists(OUT_DIR "/X_train_ii_CPU.bin") && fs::exists(OUT_DIR "/X_train_ii_GPU.bin")){
|
|
const np::Array<uint32_t> X_train_ii_cpu = load<uint32_t>(OUT_DIR "/X_train_ii_CPU.bin");
|
|
const np::Array<uint32_t> X_train_ii_gpu = load<uint32_t>(OUT_DIR "/X_train_ii_GPU.bin");
|
|
benchmark_function_void("X_train_ii CPU vs GPU", unit_test_cpu_vs_gpu<uint32_t>, X_train_ii_cpu, X_train_ii_gpu);
|
|
}
|
|
|
|
if(fs::exists(OUT_DIR "/X_test_ii_CPU.bin") && fs::exists(OUT_DIR "/X_test_ii_GPU.bin")){
|
|
const np::Array<uint32_t> X_test_ii_cpu = load<uint32_t>(OUT_DIR "/X_test_ii_CPU.bin");
|
|
const np::Array<uint32_t> X_test_ii_gpu = load<uint32_t>(OUT_DIR "/X_test_ii_GPU.bin");
|
|
benchmark_function_void("X_test_ii CPU vs GPU", unit_test_cpu_vs_gpu<uint32_t>, X_test_ii_cpu, X_test_ii_gpu);
|
|
}
|
|
|
|
if(fs::exists(OUT_DIR "/X_train_feat_CPU.bin")){
|
|
const np::Array<int32_t> X_train_feat = load<int32_t>(OUT_DIR "/X_train_feat_CPU.bin");
|
|
|
|
if(fs::exists(OUT_DIR "/X_train_feat_GPU.bin")){
|
|
const np::Array<int32_t> X_train_feat_gpu = load<int32_t>(OUT_DIR "/X_train_feat_CPU.bin");
|
|
benchmark_function_void("X_train_feat CPU vs GPU", unit_test_cpu_vs_gpu<int32_t>, X_train_feat, X_train_feat_gpu);
|
|
}
|
|
|
|
np::Array<uint16_t> X_train_feat_argsort_cpu;
|
|
uint8_t loaded = 0;
|
|
if(fs::exists(OUT_DIR "/X_train_feat_argsort_CPU.bin")){
|
|
X_train_feat_argsort_cpu = std::move(load<uint16_t>(OUT_DIR "/X_train_feat_argsort_CPU.bin"));
|
|
++loaded;
|
|
benchmark_function_void("argsort_2D training set (CPU)", unit_test_argsort_2d<int32_t>, X_train_feat, X_train_feat_argsort_cpu);
|
|
}
|
|
|
|
np::Array<uint16_t> X_train_feat_argsort_gpu;
|
|
if(fs::exists(OUT_DIR "/X_train_feat_argsort_GPU.bin")){
|
|
X_train_feat_argsort_gpu = std::move(load<uint16_t>(OUT_DIR "/X_train_feat_argsort_GPU.bin"));
|
|
++loaded;
|
|
benchmark_function_void("argsort_2D training set (GPU)", unit_test_argsort_2d<int32_t>, X_train_feat, X_train_feat_argsort_gpu);
|
|
}
|
|
|
|
if (loaded == 2)
|
|
benchmark_function_void("X_train_feat_argsort CPU vs GPU", unit_test_cpu_vs_gpu<uint16_t>, X_train_feat_argsort_cpu, X_train_feat_argsort_gpu);
|
|
}
|
|
|
|
if(fs::exists(OUT_DIR "/X_test_feat_CPU.bin")){
|
|
const np::Array<int32_t> X_test_feat = load<int32_t>(OUT_DIR "/X_test_feat_CPU.bin");
|
|
|
|
if(fs::exists(OUT_DIR "/X_test_feat_GPU.bin")){
|
|
const np::Array<int32_t> X_test_feat_gpu = load<int32_t>(OUT_DIR "/X_test_feat_GPU.bin");
|
|
benchmark_function_void("X_test_feat CPU vs GPU", unit_test_cpu_vs_gpu<int32_t>, X_test_feat, X_test_feat_gpu);
|
|
}
|
|
|
|
np::Array<uint16_t> X_test_feat_argsort_cpu;
|
|
uint8_t loaded = 0;
|
|
if(fs::exists(OUT_DIR "/X_test_feat_argsort_CPU.bin")){
|
|
X_test_feat_argsort_cpu = std::move(load<uint16_t>(OUT_DIR "/X_test_feat_argsort_CPU.bin"));
|
|
++loaded;
|
|
benchmark_function_void("argsort_2D testing set (CPU)", unit_test_argsort_2d<int32_t>, X_test_feat, X_test_feat_argsort_cpu);
|
|
}
|
|
|
|
np::Array<uint16_t> X_test_feat_argsort_gpu;
|
|
if(fs::exists(OUT_DIR "/X_test_feat_argsort_GPU.bin")){
|
|
X_test_feat_argsort_gpu = std::move(load<uint16_t>(OUT_DIR "/X_test_feat_argsort_GPU.bin"));
|
|
++loaded;
|
|
benchmark_function_void("argsort_2D testing set (GPU)", unit_test_argsort_2d<int32_t>, X_test_feat, X_test_feat_argsort_gpu);
|
|
}
|
|
|
|
if (loaded == 2)
|
|
benchmark_function_void("X_test_feat_argsort CPU vs GPU", unit_test_cpu_vs_gpu<uint16_t>, X_test_feat_argsort_cpu, X_test_feat_argsort_gpu);
|
|
}
|
|
|
|
char title[BUFFER_SIZE] = { 0 };
|
|
char alphas_title[BUFFER_SIZE] = { 0 };
|
|
char final_classifiers_title[BUFFER_SIZE] = { 0 };
|
|
|
|
for (const size_t T : TS) {
|
|
sprintf(alphas_title, MODEL_DIR "/alphas_%lu_CPU.bin", T);
|
|
if(!fs::exists(alphas_title)) continue;
|
|
const np::Array<float64_t> alphas = load<float64_t>(alphas_title);
|
|
|
|
sprintf(final_classifiers_title, MODEL_DIR "/final_classifiers_%lu_CPU.bin", T);
|
|
if(!fs::exists(final_classifiers_title)) continue;
|
|
const np::Array<float64_t> final_classifiers = load<float64_t>(final_classifiers_title);
|
|
|
|
sprintf(alphas_title, MODEL_DIR "/alphas_%lu_GPU.bin", T);
|
|
if(!fs::exists(alphas_title)) continue;
|
|
const np::Array<float64_t> alphas_gpu = load<float64_t>(alphas_title);
|
|
|
|
sprintf(final_classifiers_title, MODEL_DIR "/final_classifiers_%lu_GPU.bin", T);
|
|
if(!fs::exists(final_classifiers_title)) continue;
|
|
const np::Array<float64_t> final_classifiers_gpu = load<float64_t>(final_classifiers_title);
|
|
|
|
sprintf(title, "alphas %ld CPU vs GPU", T);
|
|
benchmark_function_void(title, unit_test_cpu_vs_gpu<float64_t>, alphas, alphas_gpu);
|
|
sprintf(title, "final_classifiers %ld CPU vs GPU", T);
|
|
benchmark_function_void(title, unit_test_cpu_vs_gpu<float64_t>, final_classifiers, final_classifiers_gpu);
|
|
}
|
|
}
|
|
|
|
int main(){
|
|
#ifdef __DEBUG
|
|
printf("| %-49s | %-17s | %-29s |\n", "Unit testing", "Time spent (ns)", "Formatted time spent");
|
|
printf("|%s|%s|%s|\n", S(51), S(19), S(31));
|
|
benchmark_function_void("Testing GPU capabilities 1D", test_working, 3 + (1<<29));
|
|
benchmark_function_void("Testing GPU capabilities 2D", test_working_2d, 3 + (1<<15), 2 + (1<<14));
|
|
benchmark_function_void("Testing GPU capabilities 3D", test_working_3d, 9 + (1<<10), 5 + (1<<10), 7 + (1<<9));
|
|
benchmark_function_void("Testing toolbox", toolbox_unit_test);
|
|
// benchmark_function_void("Testing floating capabilities", test_float);
|
|
printf("\n");
|
|
#endif
|
|
setlocale(LC_NUMERIC, ""); // Allow proper number display
|
|
const auto [ X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test ] = preprocessing();
|
|
train(X_train_feat, X_train_feat_argsort, y_train);
|
|
testing_and_evaluating(X_train_feat, y_train, X_test_feat, y_test);
|
|
final_unit_test();
|
|
#ifdef __DEBUG
|
|
printf("\nAFTER CLEANUP\n");
|
|
#endif
|
|
return EXIT_SUCCESS;
|
|
}
|