#include #include "data.hpp" #include "toolbox.hpp" #include "config.hpp" #include "toolbox_unit_test.hpp" #include "ViolaJones.hpp" #include "ViolaJones_device.hpp" #if GPU_BOOSTED #include "gpu_unit_test.hpp" #define LABEL "GPU" #else #define LABEL "CPU" #endif /** * @brief Execute the preprocessing phase * * The preprocessing phase consist of the following steps : * - Load the dataset * - Calculate features * - Calculate integral images * - Apply features to images * - Calculate argsort of the featured images * * @return std::tuple, np::Array, np::Array, np::Array, np::Array> Tuple containing in order : training features, training features sorted indexes, training labels, testing features, testing labels */ std::tuple, np::Array, np::Array, np::Array, np::Array> preprocessing(void) { // Creating state saver folders if they don't exist already if (SAVE_STATE) for (const char* const folder_name : { "models", "out" }) std::filesystem::create_directory(folder_name); const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns(); const std::array preproc_gaps = { 49, -18, 29 }; header(preproc_gaps, { "Preprocessing", "Time spent (ns)", "Formatted time spent" }); const auto [ X_train, y_train, X_test, y_test ] = state_saver("Loading sets", preproc_gaps[0], { "X_train", "y_train", "X_test", "y_test" }, FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets); #if __DEBUG printf("X_train\n"); print(X_train.shape); print(X_train, { IDX_INSPECT }); printf("X_test\n"); print(X_test.shape); print(X_test, { IDX_INSPECT }); printf("y_train\n"); print(y_train.shape); print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); printf("y_test\n"); print(y_test.shape); print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif const np::Array feats = state_saver("Building features", preproc_gaps[0], "feats", FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]); #if __DEBUG printf("feats\n"); print(feats.shape); print_feat(feats, { IDX_INSPECT }); #endif const np::Array X_train_ii = state_saver("Converting training set to integral images (" LABEL ")", preproc_gaps[0], "X_train_ii_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_train); const np::Array X_test_ii = state_saver("Converting testing set to integral images (" LABEL ")", preproc_gaps[0], "X_test_ii_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test); #if __DEBUG printf("X_train_ii\n"); print(X_train_ii.shape); print(X_train_ii, { IDX_INSPECT }); printf("X_test_ii\n"); print(X_test_ii.shape); print(X_test_ii, { IDX_INSPECT }); #endif const np::Array X_train_feat = state_saver("Applying features to training set (" LABEL ")", preproc_gaps[0], "X_train_feat_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_train_ii); const np::Array X_test_feat = state_saver("Applying features to testing set (" LABEL ")", preproc_gaps[0], "X_test_feat_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii); #if __DEBUG printf("X_train_feat\n"); print(X_train_feat.shape); print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); printf("X_test_feat\n"); print(X_test_feat.shape); print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif // const np::Array indices = state_saver("Selecting best features", preproc_gaps[0], "indices", select_percentile, X_train_feat, d.y_train); #if __DEBUG // print_feature(indices); #endif const np::Array X_train_feat_argsort = state_saver("Precalculating training set argsort (" LABEL ")", preproc_gaps[0], "X_train_feat_argsort_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat); #if __DEBUG printf("X_train_feat_argsort\n"); print(X_train_feat_argsort.shape); print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif const np::Array X_test_feat_argsort = state_saver("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL, FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat); #if __DEBUG printf("X_test_feat_argsort\n"); print(X_test_feat_argsort.shape); print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); #endif const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp); formatted_line(preproc_gaps, "├", "┼", "─", "┤"); formatted_row(preproc_gaps, { "Preprocessing summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); footer(preproc_gaps); return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test }; } /** * @brief Train the weak classifiers. * * @param X_train_feat Training images * @param X_train_feat_argsort Sorted indexes of the training images features * @param y_train Training labels * @return List of trained models */ std::array, 2>, TS.size()> train(const np::Array& X_train_feat, const np::Array& X_train_feat_argsort, const np::Array& y_train) noexcept { const std::chrono::system_clock::time_point training_timestamp = perf_counter_ns(); const std::array training_gaps = { 26, -18, 29 }; header(training_gaps, { "Training", "Time spent (ns)", "Formatted time spent" }); std::array, 2>, TS.size()> models; size_t i = 0; for (const size_t T : TS) { char title[BUFFER_SIZE] = { 0 }; char alphas_title[BUFFER_SIZE] = { 0 }; char final_classifiers_title[BUFFER_SIZE] = { 0 }; snprintf(title, BUFFER_SIZE, "ViolaJones T = %-4lu (%s)", T, LABEL); snprintf(alphas_title, BUFFER_SIZE, "alphas_%lu_%s", T, LABEL); snprintf(final_classifiers_title, BUFFER_SIZE, "final_classifiers_%lu_%s", T, LABEL); const auto [ alphas, final_classifiers ] = state_saver(title, training_gaps[0], { alphas_title, final_classifiers_title }, FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train); #if __DEBUG printf("alphas\n"); print(alphas); printf("final_classifiers\n"); print(final_classifiers); #endif models[i++] = { alphas, final_classifiers }; } const long long time_spent = duration_ns(perf_counter_ns() - training_timestamp); formatted_line(training_gaps, "├", "┼", "─", "┤"); formatted_row(training_gaps, { "Training summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); footer(training_gaps); return models; } /** * @brief Benchmark the trained classifiers on the training and testing sets. * * @param models List of trained models * @param X_train_feat Training features * @param y_train Training labels * @param X_test_feat Testing features * @param y_test Testing labels */ void testing_and_evaluating(const std::array, 2>, TS.size()>& models, const np::Array& X_train_feat, const np::Array& y_train, const np::Array& X_test_feat, const np::Array& y_test) { const std::array testing_gaps = { 26, -19, 24, -19, 24 }; header(testing_gaps, { "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" }); std::array, TS.size()> results; size_t i = 0; long long total_train_timestamp = 0; long long total_test_timestamp = 0; for (const auto& [ alphas, final_classifiers ] : models) { char title[BUFFER_SIZE] = { 0 }; snprintf(title, BUFFER_SIZE, "ViolaJones T = %-4i (%s)", TS[i], LABEL); std::chrono::system_clock::time_point start = perf_counter_ns(); const np::Array y_pred_train = classify_viola_jones(alphas, final_classifiers, X_train_feat); const long long t_pred_train = duration_ns(perf_counter_ns() - start); total_train_timestamp += t_pred_train; const float64_t e_acc = accuracy_score(y_train, y_pred_train); const float64_t e_f1 = f1_score(y_train, y_pred_train); float64_t e_FN, e_FP; std::tie(std::ignore, e_FN, e_FP, std::ignore) = confusion_matrix(y_train, y_pred_train); start = perf_counter_ns(); const np::Array y_pred_test = classify_viola_jones(alphas, final_classifiers, X_test_feat); const long long t_pred_test = duration_ns(perf_counter_ns() - start); total_test_timestamp += t_pred_test; const float64_t t_acc = accuracy_score(y_test, y_pred_test); const float64_t t_f1 = f1_score(y_test, y_pred_test); float64_t t_FN, t_FP; std::tie(std::ignore, t_FN, t_FP, std::ignore) = confusion_matrix(y_test, y_pred_test); results[i++] = { e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP }; formatted_row(testing_gaps, { title, thousand_sep(t_pred_train).c_str(), format_time_ns(t_pred_train).c_str(), thousand_sep(t_pred_test).c_str(), format_time_ns(t_pred_test).c_str() }); } formatted_line(testing_gaps, "├", "┼", "─", "┤"); formatted_row(testing_gaps, { "Testing summary", thousand_sep(total_train_timestamp).c_str(), format_time_ns(total_train_timestamp).c_str(), thousand_sep(total_test_timestamp).c_str(), format_time_ns(total_test_timestamp).c_str() }); footer(testing_gaps); const std::array evaluating_gaps = { 19, -7, -6, -6, -6, -7, -6, -6, -6 }; header(evaluating_gaps, { "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"}); i = 0; for (const size_t T : TS) { char title[BUFFER_SIZE] = { 0 }; snprintf(title, BUFFER_SIZE, "ViolaJones T = %-4lu", T); const auto [e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP] = results[i++]; printf("│ %-19s │ %'6.2f%% │ %'6.2f │ %'6.0f │ %'6.0f │ %6.2f%% │ %'6.2f │ %'6.0f │ %'6.0f │\n", title, e_acc * 100, e_f1, e_FN, e_FP, t_acc * 100, t_f1, t_FN, t_FP); } footer(evaluating_gaps); } /** * @brief Test if the each result is equals to other devices. * * Given ViolaJones is a fully deterministic algorithm. The results, regardless the device, should be the same, * this function check this assertion. */ void unit_test(void) { const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns(); const std::array unit_gaps = { 37, -10, -18, 29}; header(unit_gaps, { "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" }); char title[BUFFER_SIZE] = { 0 }; char tmp_title[BUFFER_SIZE / 2] = { 0 }; char file_cpu[BUFFER_SIZE] = { 0 }; char file_gpu[BUFFER_SIZE] = { 0 }; uint64_t n_total = 0, n_success = 0; const auto test_fnc = [&unit_gaps, &n_total, &n_success](const char* const title, const auto& fnc) noexcept { ++n_total; const std::chrono::system_clock::time_point start = perf_counter_ns(); const bool state = fnc(); const long long time_spent = duration_ns(perf_counter_ns() - start); if(state){ formatted_row(unit_gaps, { title, "Passed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); ++n_success; } else formatted_row(unit_gaps, { title, "Failed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); }; for (const char* const label : { "train", "test" }) { snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_ii_CPU.bin", label); snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_ii_GPU.bin", label); if (std::filesystem::exists(file_cpu) && std::filesystem::exists(file_gpu)) { snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_ii", label); snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title); test_fnc(title, [&file_cpu, &file_gpu]{ const np::Array X_train_ii_cpu = load(file_cpu); const np::Array X_train_ii_gpu = load(file_gpu); return unit_test_cpu_vs_gpu(X_train_ii_cpu, X_train_ii_gpu); }); } snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_CPU.bin", label); snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_GPU.bin", label); uint8_t feat = 0; char file_feat[BUFFER_SIZE] = { 0 }; if (std::filesystem::exists(file_cpu)) { strncpy(file_feat, file_cpu, BUFFER_SIZE); feat = 1; } else if (std::filesystem::exists(file_gpu)) { strncpy(file_feat, file_gpu, BUFFER_SIZE); feat = 2; } if (feat != 0) { const np::Array X_feat = load(file_feat); snprintf(file_gpu, BUFFER_SIZE, feat == 1 ? OUT_DIR "/X_%s_feat_GPU.bin" : OUT_DIR "/X_%s_feat_CPU.bin", label); if (std::filesystem::exists(file_gpu)) { snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat", label); snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title); test_fnc(title, [&X_feat, &file_gpu]{ const np::Array X_feat_aux = load(file_gpu); return unit_test_cpu_vs_gpu(X_feat, X_feat_aux); }); } snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_argsort_CPU.bin", label); np::Array X_feat_argsort_cpu; uint8_t loaded = 0; if (std::filesystem::exists(file_cpu)) { ++loaded; snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label); snprintf(title, BUFFER_SIZE, "%-22s - CPU argsort", tmp_title); test_fnc(title, [&X_feat, &X_feat_argsort_cpu, &file_cpu]{ X_feat_argsort_cpu = load(file_cpu); return unit_test_argsort_2d(X_feat, X_feat_argsort_cpu); }); } snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_argsort_GPU.bin", label); np::Array X_feat_argsort_gpu; if (std::filesystem::exists(file_gpu)) { ++loaded; snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label); snprintf(title, BUFFER_SIZE, "%-22s - GPU argsort", tmp_title); test_fnc(title, [&X_feat, &X_feat_argsort_gpu, &file_gpu]{ X_feat_argsort_gpu = load(file_gpu); return unit_test_argsort_2d(X_feat, X_feat_argsort_gpu); }); } if (loaded == 2){ snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label); snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title); test_fnc(title, [&X_feat_argsort_cpu, &X_feat_argsort_gpu]{ return unit_test_cpu_vs_gpu(X_feat_argsort_cpu, X_feat_argsort_gpu); }); } } } for (const size_t T : TS) for (const char* const label : { "alphas", "final_classifiers" }) { snprintf(file_cpu, BUFFER_SIZE, MODEL_DIR "/%s_%lu_CPU.bin", label, T); snprintf(file_gpu, BUFFER_SIZE, MODEL_DIR "/%s_%lu_GPU.bin", label, T); if (std::filesystem::exists(file_cpu) && std::filesystem::exists(file_gpu)){ snprintf(tmp_title, BUFFER_SIZE / 2, "%s_%ld", label, T); snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title); test_fnc(title, [&file_cpu, &file_gpu]{ const np::Array cpu = load(file_cpu); const np::Array gpu = load(file_gpu); return unit_test_cpu_vs_gpu(cpu, gpu); }); } } const long long time_spent = duration_ns(perf_counter_ns() - unit_timestamp); if (n_total == 0) formatted_row(unit_gaps, { "Unit testing summary", "No files", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); else { snprintf(title, BUFFER_SIZE, "%ld/%ld", n_success, n_total); formatted_line(unit_gaps, "├", "┼", "─", "┤"); formatted_row(unit_gaps, { "Unit testing summary", title, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); } footer(unit_gaps); } int32_t main(void){ setlocale(LC_NUMERIC, ""); // Allow proper number display const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns(); const std::array unit_gaps = { 27, -18, 29 }; header(unit_gaps, { "Unit testing", "Time spent (ns)", "Formatted time spent" }); #if GPU_BOOSTED benchmark_function_void("Testing GPU capabilities 1D", unit_gaps[0], test_working, 50000); benchmark_function_void("Testing GPU capabilities 2D", unit_gaps[0], test_working_2d, 200, 500); benchmark_function_void("Testing GPU capabilities 3D", unit_gaps[0], test_working_3d, 30, 40, 500); #endif benchmark_function_void("Testing format_time", unit_gaps[0], format_time_test); benchmark_function_void("Testing format_time_ns", unit_gaps[0], format_time_ns_test); benchmark_function_void("Testing format_byte_size", unit_gaps[0], format_byte_size_test); benchmark_function_void("Testing thousand_sep", unit_gaps[0], thousand_sep_test); const long long time_spent = duration_ns(perf_counter_ns() - unit_timestamp); formatted_line(unit_gaps, "├", "┼", "─", "┤"); formatted_row(unit_gaps, { "Unit testing summary", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); footer(unit_gaps); const auto [ X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test ] = preprocessing(); const std::array, 2>, TS.size()> models = train(X_train_feat, X_train_feat_argsort, y_train); testing_and_evaluating(models, X_train_feat, y_train, X_test_feat, y_test); unit_test(); return EXIT_SUCCESS; }