#include namespace fs = std::filesystem; #include "data.hpp" #include "toolbox.hpp" #include "config.hpp" #include "ViolaJones.hpp" #include "ViolaJonesGPU.hpp" #include "ViolaJonesCPU.hpp" void test_float() noexcept; #if GPU_BOOSTED #define LABEL "GPU" #define apply_features apply_features_gpu #define set_integral_image set_integral_image_gpu #define argsort_2d argsort_2d_gpu #else #define LABEL "CPU" #define apply_features apply_features_cpu #define set_integral_image set_integral_image_cpu #define argsort_2d argsort_2d_cpu #endif std::tuple, np::Array, np::Array, np::Array, np::Array> preprocessing() { // Creating state saver folders if they don't exist already if (SAVE_STATE) for (const char* const folder_name : { "models", "out" }) fs::create_directory(folder_name); printf("| %-49s | %-18s | %-29s |\n", "Preprocessing", "Time spent (ns)", "Formatted time spent"); printf("|%s|%s|%s|\n", S(51), S(20), S(31)); const auto [ X_train, y_train, X_test, y_test ] = state_saver("Loading sets", {"X_train", "y_train", "X_test", "y_test"}, FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets); #if __DEBUG print("X_train"); print(X_train.shape); print(X_train, { IDX_INSPECT }); print("X_test"); print(X_test.shape); print(X_test, { IDX_INSPECT }); print("y_train"); print(y_train.shape); print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); print("y_test"); print(y_test.shape); print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif const np::Array feats = state_saver("Building features", "feats", FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]); #if __DEBUG print("feats"); print(feats.shape); print_feat(feats, { IDX_INSPECT }); #endif const np::Array X_train_ii = state_saver("Converting training set to integral images (" LABEL ")", "X_train_ii_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_train); const np::Array X_test_ii = state_saver("Converting testing set to integral images (" LABEL ")", "X_test_ii_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test); #if __DEBUG print("X_train_ii"); print(X_train_ii.shape); print(X_train_ii, { IDX_INSPECT }); print("X_test_ii"); print(X_test_ii.shape); print(X_test_ii, { IDX_INSPECT }); #endif const np::Array X_train_feat = state_saver("Applying features to training set (" LABEL ")", "X_train_feat_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_train_ii); const np::Array X_test_feat = state_saver("Applying features to testing set (" LABEL ")", "X_test_feat_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii); #if __DEBUG print("X_train_feat"); print(X_train_feat.shape); print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); print("X_test_feat"); print(X_test_feat.shape); print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif // const Array indices = measure_time_save>("Selecting best features", "indices", select_percentile, X_train_feat, d.y_train); // const Array indices = measure_time>("Selecting best features", select_percentile, X_train_feat, d.y_train); #if __DEBUG // print_feature(indices); #endif const np::Array X_train_feat_argsort = state_saver("Precalculating training set argsort (" LABEL ")", "X_train_feat_argsort_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat); #if __DEBUG print("X_train_feat_argsort"); print(X_train_feat_argsort.shape); print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif const np::Array X_test_feat_argsort = state_saver("Precalculating testing set argsort (" LABEL ")", "X_test_feat_argsort_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat); #if __DEBUG print("X_test_feat_argsort"); print(X_test_feat_argsort.shape); print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test }; } void train(const np::Array& X_train_feat, const np::Array& X_train_feat_argsort, const np::Array& y_train) { printf("\n| %-49s | %-18s | %-29s |\n", "Training", "Time spent (ns)", "Formatted time spent"); printf("|%s|%s|%s|\n", S(51), S(20), S(31)); for (const size_t T : TS) { char title[BUFFER_SIZE] = { 0 }; char alphas_title[BUFFER_SIZE] = { 0 }; char final_classifiers_title[BUFFER_SIZE] = { 0 }; sprintf(title, "ViolaJones T = %-4lu (%s)", T, LABEL); sprintf(alphas_title, "alphas_%lu_%s", T, LABEL); sprintf(final_classifiers_title, "final_classifiers_%lu_%s", T, LABEL); #if __DEBUG const auto [ alphas, final_classifiers ] = state_saver(title, { alphas_title, final_classifiers_title }, #else state_saver(title, { alphas_title, final_classifiers_title }, #endif FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train); #if __DEBUG print("alphas"); print(alphas); print("final_classifiers"); print(final_classifiers); #endif } } void testing_and_evaluating(const np::Array& X_train_feat, const np::Array& y_train, const np::Array& X_test_feat, const np::Array& y_test) { printf("\n| %-26s | Time spent (ns) (E) | %-29s | Time spent (ns) (T) | %-29s |\n", "Testing", "Formatted time spent (E)", "Formatted time spent (T)"); printf("|%s|%s|%s|%s|%s|\n", S(28), S(21), S(31), S(21), S(31)); constexpr const size_t NT = sizeof(TS) / sizeof(size_t); std::array, NT> results; size_t i = 0; for (const size_t T : TS) { char title[BUFFER_SIZE] = { 0 }; char alphas_title[BUFFER_SIZE] = { 0 }; char final_classifiers_title[BUFFER_SIZE] = { 0 }; sprintf(title, "ViolaJones T = %-4lu (%s)", T, LABEL); sprintf(alphas_title, MODEL_DIR "/alphas_%lu_%s.bin", T, LABEL); sprintf(final_classifiers_title, MODEL_DIR "/final_classifiers_%lu_%s.bin", T, LABEL); const np::Array alphas = load(alphas_title); const np::Array final_classifiers = load(final_classifiers_title); auto start = time(); const np::Array y_pred_train = classify_viola_jones(alphas, final_classifiers, X_train_feat); const long long t_pred_train = duration_ns(time() - start); const float64_t e_acc = accuracy_score(y_train, y_pred_train); const float64_t e_f1 = f1_score(y_train, y_pred_train); float64_t e_FN, e_FP; std::tie(std::ignore, e_FN, e_FP, std::ignore) = confusion_matrix(y_train, y_pred_train); start = time(); const np::Array y_pred_test = classify_viola_jones(alphas, final_classifiers, X_test_feat); const long long t_pred_test = duration_ns(time() - start); const float64_t t_acc = accuracy_score(y_test, y_pred_test); const float64_t t_f1 = f1_score(y_test, y_pred_test); float64_t t_FN, t_FP; std::tie(std::ignore, t_FN, t_FP, std::ignore) = confusion_matrix(y_test, y_pred_test); results[i++] = { e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP }; printf("| %-26s | %'19lld | %-29s | %'19lld | %-29s |\n", title, t_pred_train, format_time_ns(t_pred_train).c_str(), t_pred_test, format_time_ns(t_pred_test).c_str()); } printf("\n| %-19s | ACC (E) | F1 (E) | FN (E) | FP (E) | ACC (T) | F1 (T) | FN (T) | FP (T) |\n", "Evaluating"); printf("|%s|%s|%s|%s|%s|%s|%s|%s|%s|\n", S(21), S(9), S(8), S(8), S(8), S(9), S(8), S(8), S(8)); i = 0; for (const size_t T : TS) { char title[BUFFER_SIZE] = { 0 }; sprintf(title, "ViolaJones T = %-4lu", T); const auto [e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP] = results[i++]; printf("| %-19s | %'6.2f%% | %'6.2f | %'6.0f | %'6.0f | %6.2f%% | %'6.2f | %'6.0f | %'6.0f |\n", title, e_acc * 100, e_f1, e_FN, e_FP, t_acc * 100, t_f1, t_FN, t_FP); } } void final_unit_test() { printf("\n| %-49s | %-10s | %-17s | %-29s |\n", "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent"); printf("|%s|%s|%s|%s|\n", S(51), S(12), S(19), S(31)); if(fs::exists(OUT_DIR "/X_train_ii_CPU.bin") && fs::exists(OUT_DIR "/X_train_ii_GPU.bin")){ const np::Array X_train_ii_cpu = load(OUT_DIR "/X_train_ii_CPU.bin"); const np::Array X_train_ii_gpu = load(OUT_DIR "/X_train_ii_GPU.bin"); benchmark_function_void("X_train_ii CPU vs GPU", unit_test_cpu_vs_gpu, X_train_ii_cpu, X_train_ii_gpu); } if(fs::exists(OUT_DIR "/X_test_ii_CPU.bin") && fs::exists(OUT_DIR "/X_test_ii_GPU.bin")){ const np::Array X_test_ii_cpu = load(OUT_DIR "/X_test_ii_CPU.bin"); const np::Array X_test_ii_gpu = load(OUT_DIR "/X_test_ii_GPU.bin"); benchmark_function_void("X_test_ii CPU vs GPU", unit_test_cpu_vs_gpu, X_test_ii_cpu, X_test_ii_gpu); } if(fs::exists(OUT_DIR "/X_train_feat_CPU.bin")){ const np::Array X_train_feat = load(OUT_DIR "/X_train_feat_CPU.bin"); if(fs::exists(OUT_DIR "/X_train_feat_GPU.bin")){ const np::Array X_train_feat_gpu = load(OUT_DIR "/X_train_feat_CPU.bin"); benchmark_function_void("X_train_feat CPU vs GPU", unit_test_cpu_vs_gpu, X_train_feat, X_train_feat_gpu); } np::Array X_train_feat_argsort_cpu; uint8_t loaded = 0; if(fs::exists(OUT_DIR "/X_train_feat_argsort_CPU.bin")){ X_train_feat_argsort_cpu = std::move(load(OUT_DIR "/X_train_feat_argsort_CPU.bin")); ++loaded; benchmark_function_void("argsort_2D training set (CPU)", unit_test_argsort_2d, X_train_feat, X_train_feat_argsort_cpu); } np::Array X_train_feat_argsort_gpu; if(fs::exists(OUT_DIR "/X_train_feat_argsort_GPU.bin")){ X_train_feat_argsort_gpu = std::move(load(OUT_DIR "/X_train_feat_argsort_GPU.bin")); ++loaded; benchmark_function_void("argsort_2D training set (GPU)", unit_test_argsort_2d, X_train_feat, X_train_feat_argsort_gpu); } if (loaded == 2) benchmark_function_void("X_train_feat_argsort CPU vs GPU", unit_test_cpu_vs_gpu, X_train_feat_argsort_cpu, X_train_feat_argsort_gpu); } if(fs::exists(OUT_DIR "/X_test_feat_CPU.bin")){ const np::Array X_test_feat = load(OUT_DIR "/X_test_feat_CPU.bin"); if(fs::exists(OUT_DIR "/X_test_feat_GPU.bin")){ const np::Array X_test_feat_gpu = load(OUT_DIR "/X_test_feat_GPU.bin"); benchmark_function_void("X_test_feat CPU vs GPU", unit_test_cpu_vs_gpu, X_test_feat, X_test_feat_gpu); } np::Array X_test_feat_argsort_cpu; uint8_t loaded = 0; if(fs::exists(OUT_DIR "/X_test_feat_argsort_CPU.bin")){ X_test_feat_argsort_cpu = std::move(load(OUT_DIR "/X_test_feat_argsort_CPU.bin")); ++loaded; benchmark_function_void("argsort_2D testing set (CPU)", unit_test_argsort_2d, X_test_feat, X_test_feat_argsort_cpu); } np::Array X_test_feat_argsort_gpu; if(fs::exists(OUT_DIR "/X_test_feat_argsort_GPU.bin")){ X_test_feat_argsort_gpu = std::move(load(OUT_DIR "/X_test_feat_argsort_GPU.bin")); ++loaded; benchmark_function_void("argsort_2D testing set (GPU)", unit_test_argsort_2d, X_test_feat, X_test_feat_argsort_gpu); } if (loaded == 2) benchmark_function_void("X_test_feat_argsort CPU vs GPU", unit_test_cpu_vs_gpu, X_test_feat_argsort_cpu, X_test_feat_argsort_gpu); } char title[BUFFER_SIZE] = { 0 }; char alphas_title[BUFFER_SIZE] = { 0 }; char final_classifiers_title[BUFFER_SIZE] = { 0 }; for (const size_t T : TS) { sprintf(alphas_title, MODEL_DIR "/alphas_%lu_CPU.bin", T); if(!fs::exists(alphas_title)) continue; const np::Array alphas = load(alphas_title); sprintf(final_classifiers_title, MODEL_DIR "/final_classifiers_%lu_CPU.bin", T); if(!fs::exists(final_classifiers_title)) continue; const np::Array final_classifiers = load(final_classifiers_title); sprintf(alphas_title, MODEL_DIR "/alphas_%lu_GPU.bin", T); if(!fs::exists(alphas_title)) continue; const np::Array alphas_gpu = load(alphas_title); sprintf(final_classifiers_title, MODEL_DIR "/final_classifiers_%lu_GPU.bin", T); if(!fs::exists(final_classifiers_title)) continue; const np::Array final_classifiers_gpu = load(final_classifiers_title); sprintf(title, "alphas %ld CPU vs GPU", T); benchmark_function_void(title, unit_test_cpu_vs_gpu, alphas, alphas_gpu); sprintf(title, "final_classifiers %ld CPU vs GPU", T); benchmark_function_void(title, unit_test_cpu_vs_gpu, final_classifiers, final_classifiers_gpu); } } int main(){ #if __DEBUG printf("| %-49s | %-18s | %-29s |\n", "Unit testing", "Time spent (ns)", "Formatted time spent"); printf("|%s|%s|%s|\n", S(51), S(20), S(31)); benchmark_function_void("Testing GPU capabilities 1D", test_working, 3 + (1<<29)); benchmark_function_void("Testing GPU capabilities 2D", test_working_2d, 3 + (1<<15), 2 + (1<<14)); benchmark_function_void("Testing GPU capabilities 3D", test_working_3d, 9 + (1<<10), 5 + (1<<10), 7 + (1<<9)); benchmark_function_void("Testing toolbox", toolbox_unit_test); // benchmark_function_void("Testing floating capabilities", test_float); printf("\n"); #endif setlocale(LC_NUMERIC, ""); // Allow proper number display const auto [ X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test ] = preprocessing(); train(X_train_feat, X_train_feat_argsort, y_train); testing_and_evaluating(X_train_feat, y_train, X_test_feat, y_test); final_unit_test(); #if __DEBUG printf("\nAFTER CLEANUP\n"); #endif return EXIT_SUCCESS; }