Compare commits

..

No commits in common. "4a427478377f97d3782de48692a5bdbc63ad5d4e" and "211dcad893a4f5d64601923a2faafc163eac7a63" have entirely different histories.

45 changed files with 1288 additions and 2141 deletions

0
.gitignore vendored Normal file → Executable file
View File

0
README.fr.md Normal file → Executable file
View File

View File

@ -1,17 +0,0 @@
FROM nvidia/cuda:12.4.1-devel-ubi9 as builder
WORKDIR /home/ViolaJones/cpp
COPY *.cu *.cpp *.hpp Makefile ./
RUN make -j "$(nproc)"
FROM nvidia/cuda:12.4.1-base-ubi9
WORKDIR /home/ViolaJones/cpp
RUN dnf install -y make-1:4.3-7.el9 && dnf clean all
COPY --from=builder /home/ViolaJones/cpp/bin ./bin
COPY Makefile .
ENTRYPOINT ["make"]
CMD ["start"]

View File

@ -1,135 +1,79 @@
CC := nvcc -m64 -t=0 -std=c++17 -Xcompiler -m64,-std=c++17 CC := nvcc -m64 -std=c++17 -ccbin g++-12 -Xcompiler -m64,-std=c++17
OBJ_DIR := bin OBJ_DIR := bin
$(shell mkdir -p $(OBJ_DIR))
MODELS_DIR := models MODELS_DIR := models
OUT_DIR := out OUT_DIR := out
SRC_DIR := . SRC_DIR := .
DATA_PATH := ../data #CFLAGS := -O0 -Werror=all-warnings -g -G
#CFLAGS := -O0 -g -G -Xptxas=-w -Xcompiler -O0,-rdynamic,-g #CFLAGS := $(CFLAGS) -pg
#CFLAGS := -O0 -g -G -pg -Xptxas=-w -Xcompiler -O0,-rdynamic,-g #CFLAGS := $(CFLAGS) -Xptxas=-w
CFLAGS := -dlto -O2 -Xcompiler -O2 #CFLAGS := $(CFLAGS) -Xcompiler -Wall,-O0,-g,-Werror,-Werror=implicit-fallthrough=0,-Wextra,-rdynamic
#CFLAGS := -dlto -O2 -g -Xcompiler -O2,-g,-ggdb CFLAGS := -O4 -Xcompiler -O4
CFLAGS := $(CFLAGS) -MMD -MP -Werror=all-warnings -Xcompiler -Wall,-Werror,-Werror=implicit-fallthrough=0,-Wextra
EXEC := $(OBJ_DIR)/ViolaJones EXEC := $(OBJ_DIR)/ViolaJones
DATA := $(DATA_PATH)/X_train.bin $(DATA_PATH)/X_test.bin $(DATA_PATH)/y_train.bin $(DATA_PATH)/y_test.bin DATA := ../data/X_train.bin ../data/X_test.bin ../data/y_train.bin ../data/y_test.bin
SRC := $(shell find $(SRC_DIR) -name '*.cpp' -o -name '*.cu') SRC := $(shell find $(SRC_DIR) -name "*.cpp" -o -name "*.cu" )
OBJ_EXT := o OBJ_EXT := o
ifeq ($(OS), Windows_NT) ifeq ($(OS), Windows_NT)
EXEC := $(EXEC).exe EXEC:=$(EXEC).exe
OBJ_EXT := obj OBJ_EXT:=obj
endif endif
OBJ := $(SRC:$(SRC_DIR)/%.cpp=$(OBJ_DIR)/%.$(OBJ_EXT)) OBJ := $(SRC:$(SRC_DIR)/%.cpp=$(OBJ_DIR)/%.$(OBJ_EXT))
OBJ := $(OBJ:$(SRC_DIR)/%.cu=$(OBJ_DIR)/%.$(OBJ_EXT)) OBJ := $(OBJ:$(SRC_DIR)/%.cu=$(OBJ_DIR)/%.$(OBJ_EXT))
.PHONY: all .PHONY: all start reset clean mrproper debug check
all: $(EXEC)
$(OBJ_DIR): all: $(EXEC) $(DATA)
@mkdir -v $@
# Compiling host code # Compiling host code
$(OBJ_DIR)/%.$(OBJ_EXT): $(SRC_DIR)/%.cpp | $(OBJ_DIR) check-nvcc-works $(OBJ_DIR)/%.$(OBJ_EXT): $(SRC_DIR)/%.cpp
@echo Compiling $< @echo Compiling $<
@$(CC) $(CFLAGS) -c $< -o $@ @$(CC) $(CFLAGS) -c $< -o $@
# Compiling gpu code # Compiling gpu code
$(OBJ_DIR)/%.$(OBJ_EXT): $(SRC_DIR)/%.cu | $(OBJ_DIR) check-nvcc-works $(OBJ_DIR)/%.$(OBJ_EXT): $(SRC_DIR)/%.cu
@echo Compiling $< @echo Compiling $<
@$(CC) $(CFLAGS) -c $< -o $@ @$(CC) $(CFLAGS) -c $< -o $@
$(EXEC): $(OBJ) | check-nvcc-works $(EXEC): $(OBJ)
@echo Linking objects files to $@ @echo Linking objects files to $@
@$(CC) $(CFLAGS) $^ -o $@ @$(CC) $(CFLAGS) $^ -o $@
$(DATA): $(DATA):
@echo 'Missing $(DATA) files, use downloader first' && exit 1 @bash ../download_data.sh ..
.PHONY: start
start: $(EXEC) $(DATA) start: $(EXEC) $(DATA)
@./$(EXEC) @./$(EXEC)
.PHONY: debug profile: start
@gprof $(EXEC) gmon.out | gprof2dot | dot -Tpng -o output.png
#@gprof $(EXEC) gmon.out > analysis.txt
debug: $(EXEC) $(DATA) debug: $(EXEC) $(DATA)
#@cuda-gdb -q $(EXEC) #@cuda-gdb -q $(EXEC)
@gdb -q --tui $(EXEC) @gdb -q --tui $(EXEC)
.PHONY: profile check: $(EXEC) $(DATA)
profile: start | check-gprof-works check-gprof2dot-works check-dot-works
@gprof $(EXEC) gmon.out | gprof2dot | dot -T png -o output.png
.PHONY: check
check: $(EXEC) $(DATA) | check-valgrind-works
@valgrind -q -s --leak-check=full --show-leak-kinds=all $(EXEC) @valgrind -q -s --leak-check=full --show-leak-kinds=all $(EXEC)
.PHONY: cudacheck cudacheck: $(EXEC) $(DATA)
cudacheck: $(EXEC) $(DATA) | check-computer-sanitizer-works @cuda-memcheck --destroy-on-device-error kernel --tool memcheck --leak-check full --report-api-errors all $(EXEC)
@compute-sanitizer --destroy-on-device-error kernel --tool memcheck --leak-check full --report-api-errors all --track-stream-ordered-races all --target-processes all $(EXEC) #@cuda-memcheck --destroy-on-device-error kernel --tool racecheck --racecheck-report all $(EXEC)
#@compute-sanitizer --destroy-on-device-error kernel --tool racecheck --racecheck-detect-level info --racecheck-report all $(EXEC) #@cuda-memcheck --destroy-on-device-error kernel --tool initcheck --track-unused-memory yes $(EXEC)
#@compute-sanitizer --destroy-on-device-error kernel --tool initcheck --track-unused-memory yes $(EXEC) #@cuda-memcheck --destroy-on-device-error kernel --tool synccheck $(EXEC)
#@compute-sanitizer --destroy-on-device-error kernel --tool synccheck $(EXEC) #@compute-sanitizer --destroy-on-device-error kernel --tool memcheck --leak-check full --report-api-errors all --track-stream-ordered-races all $(EXEC)
#@compute-sanitizer --destroy-on-device-error kernel --tool racecheck --racecheck-detect-level info --racecheck-report all $(EXEC)
#@compute-sanitizer --destroy-on-device-error kernel --tool initcheck --track-unused-memory yes $(EXEC)
#@compute-sanitizer --destroy-on-device-error kernel --tool synccheck $(EXEC)
.PHONY: log r2: $(EXEC) $(DATA)
log: $(DATA) reset @r2 $(EXEC)
@echo 'Building GPU'
@sed -i 's/GPU_BOOSTED false/GPU_BOOSTED true/' config.hpp
@make -s -j "$(shell nproc)"
@echo 'Logging GPU'
@make -s start > log_gpu
@echo 'Building CPU'
@sed -i 's/GPU_BOOSTED true/GPU_BOOSTED false/' config.hpp
@make -s -j "$(shell nproc)"
@echo 'Logging CPU'
@make -s start > log_cpu
@sed -i 's/GPU_BOOSTED false/GPU_BOOSTED true/' config.hpp
@echo 'Cleaning up'
@make -s reset
.PHONY: reset
reset: reset:
@echo 'Deleting generated states and models' @echo Deleting generated states and models
@rm -frv $(OUT_DIR)/* $(MODELS_DIR)/* @rm -rf $(OUT_DIR)/* $(MODELS_DIR)/* | true
#@ln -sv /mnt/pierre_stuffs/ViolaJones/cpp/models .
#@ln -sv /mnt/pierre_stuffs/ViolaJones/cpp/out .
.PHONY: clean
clean: clean:
@rm -fv $(EXEC) log_gpu log_cpu @rm $(EXEC)
.PHONY: mrproper mrproper:
mrproper: clean @rm -r $(OBJ_DIR)
@rm -rfv $(OBJ_DIR) gmon.out
.PHONY: help
help:
@echo "Available targets:"
@echo "\tall: alias for start, (default target)"
@echo "\tstart: Start the ViolaJones algorithm, require data beforehand downloaded by the downloader."
@echo "\tdebug: Debug the ViolaJones algorithm, require data beforehand downloaded by the downloader."
@echo "\tprofile: Profile the ViolaJones algorithm functions timestamps, require data beforehand downloaded by the downloader."
@echo "\treset: Will delete any saved models and processed data made by ViolaJones."
@echo "\tmrproper: Will remove cpp binary files. Will execute reset target beforehand."
.PHONY: check-nvcc-works
check-nvcc-works:
@nvcc --version >/dev/null 2>&1 || (echo 'Please install NVIDIA Cuda compiler.' && exit 1)
.PHONY: check-gprof-works
check-gprof-works:
@gprof --version >/dev/null 2>&1 || (echo 'Please install GNU gprof.' && exit 1)
.PHONY: check-gprof2dot-works
check-gprof2dot-works:
@gprof2dot --help >/dev/null 2>&1 || (echo 'Please install gprof2dot.' && exit 1)
.PHONY: check-dot-works
check-dot-works:
@dot --version >/dev/null 2>&1 || (echo 'Please install dot from graphviz.' && exit 1)
.PHONY: check-valgrind-works
check-valgrind-works:
@valgrind --version >/dev/null 2>&1 || (echo 'Please install valgrind.' && exit 1)
.PHONY: check-computer-sanitizer-works
check-computer-sanitizer-works:
@computer-sanitizer --version >/dev/null 2>&1 || (echo 'Please install Compute Sanitizer from Cuda toolkit.' && exit 1)
-include $(OBJ:.o=.d)

View File

@ -1,61 +1,56 @@
#include <cmath> #include <cmath>
#include "data.hpp" #include "data.hpp"
#include "ViolaJones_device.hpp" #include "config.hpp"
#include "ViolaJonesGPU.hpp"
#include "ViolaJonesCPU.hpp"
constexpr static inline void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept { static inline void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept {
memset(&feats[n], 0, 4 * sizeof(uint8_t)); memset(&feats[n], 0, 4 * sizeof(uint8_t));
n += 4; n += 4;
} }
constexpr static inline void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { static inline void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i + w; feats[n++] = i + w;
feats[n++] = j; feats[n++] = j;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { static inline void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i; feats[n++] = i;
feats[n++] = j; feats[n++] = j;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { static inline void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i; feats[n++] = i;
feats[n++] = j + h; feats[n++] = j + h;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { static inline void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i + 2 * w; feats[n++] = i + 2 * w;
feats[n++] = j; feats[n++] = j;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { static inline void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i; feats[n++] = i;
feats[n++] = j + 2 * h; feats[n++] = j + 2 * h;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
constexpr static inline void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept { static inline void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
feats[n++] = i + w; feats[n++] = i + w;
feats[n++] = j + h; feats[n++] = j + h;
feats[n++] = w; feats[n++] = w;
feats[n++] = h; feats[n++] = h;
} }
/**
* @brief Initialize the features based on the input shape.
*
* @param width Width of the image
* @param height Height of the image
* @return The initialized features
*/
np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height) noexcept { np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height) noexcept {
size_t n = 0; size_t n = 0;
uint16_t w, h, i, j; uint16_t w, h, i, j;
@ -115,11 +110,11 @@ np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height)
return feats; return feats;
} }
//np::Array<int32_t> select_percentile(const np::Array<uint8_t> X_feat, const np::Array<uint8_t> y) noexcept { //np::Array<int> select_percentile(const np::Array<uint8_t> X_feat, const np::Array<uint8_t> y) noexcept {
// std::vector<float64_t> class_0, class_1; // std::vector<float64_t> class_0, class_1;
// //
// const int32_t im_size = X_feat.shape[0] / y.shape[0]; // const int im_size = X_feat.shape[0] / y.shape[0];
// int32_t idy = 0, n_samples_per_class_0 = 0, n_samples_per_class_1 = 0; // int idy = 0, n_samples_per_class_0 = 0, n_samples_per_class_1 = 0;
// for (size_t i = 0; i < X_feat.shape[0]; i += im_size) { // for (size_t i = 0; i < X_feat.shape[0]; i += im_size) {
// if (y[idy] == 0) { // if (y[idy] == 0) {
// ++n_samples_per_class_0; // ++n_samples_per_class_0;
@ -131,24 +126,24 @@ np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height)
// } // }
// ++idy; // ++idy;
// } // }
// const int32_t n_samples = n_samples_per_class_0 + n_samples_per_class_1; // const int n_samples = n_samples_per_class_0 + n_samples_per_class_1;
// //
// float64_t ss_alldata_0 = 0; // float64_t ss_alldata_0 = 0;
// for (int32_t i = 0;i < n_samples_per_class_0;++i) // for (int i = 0;i < n_samples_per_class_0;++i)
// ss_alldata_0 += (class_0[i] * class_0[i]); // ss_alldata_0 += (class_0[i] * class_0[i]);
// //
// float64_t ss_alldata_1 = 0; // float64_t ss_alldata_1 = 0;
// for (int32_t i = 0;i < n_samples_per_class_1;++i) // for (int i = 0;i < n_samples_per_class_1;++i)
// ss_alldata_1 += (class_1[i] * class_1[i]); // ss_alldata_1 += (class_1[i] * class_1[i]);
// //
// const float64_t ss_alldata = ss_alldata_0 + ss_alldata_1; // const float64_t ss_alldata = ss_alldata_0 + ss_alldata_1;
// //
// float64_t sums_classes_0 = 0; // float64_t sums_classes_0 = 0;
// for (int32_t i = 0;i < n_samples_per_class_0;++i) // for (int i = 0;i < n_samples_per_class_0;++i)
// sums_classes_0 += class_0[i]; // sums_classes_0 += class_0[i];
// //
// float64_t sums_classes_1 = 0; // float64_t sums_classes_1 = 0;
// for (int32_t i = 0;i < n_samples_per_class_1;++i) // for (int i = 0;i < n_samples_per_class_1;++i)
// sums_classes_1 += class_1[i]; // sums_classes_1 += class_1[i];
// //
// float64_t sq_of_sums_alldata = sums_classes_0 + sums_classes_1; // float64_t sq_of_sums_alldata = sums_classes_0 + sums_classes_1;
@ -159,21 +154,15 @@ np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height)
// const float64_t ss_tot = ss_alldata - sq_of_sums_alldata / n_samples; // const float64_t ss_tot = ss_alldata - sq_of_sums_alldata / n_samples;
// const float64_t sqd_sum_bw_n = sq_of_sums_args_0 / n_samples_per_class_0 + sq_of_sums_args_1 / n_samples_per_class_1 - sq_of_sums_alldata / n_samples; // const float64_t sqd_sum_bw_n = sq_of_sums_args_0 / n_samples_per_class_0 + sq_of_sums_args_1 / n_samples_per_class_1 - sq_of_sums_alldata / n_samples;
// const float64_t ss_wn = ss_tot - sqd_sum_bw_n; // const float64_t ss_wn = ss_tot - sqd_sum_bw_n;
// const int32_t df_wn = n_samples - 2; // const int df_wn = n_samples - 2;
// const float64_t msw = ss_wn / df_wn; // const float64_t msw = ss_wn / df_wn;
// const float64_t f_values = sqd_sum_bw_n / msw; // const float64_t f_values = sqd_sum_bw_n / msw;
// //
// const np::Array<int32_t> res = np::empty<int32_t>({ static_cast<size_t>(std::ceil(static_cast<float64_t>(im_size) / 10.0)) }); // const np::Array<int> res = np::empty<int>({ static_cast<size_t>(std::ceil(static_cast<float64_t>(im_size) / 10.0)) });
// // TODO Complete code // // TODO Complete code
// return res; // return res;
//} //}
/**
* @brief Initialize the weights of the weak classifiers based on the training labels.
*
* @param y_train Training labels
* @return The initialized weights
*/
np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept { np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept {
np::Array<float64_t> weights = np::empty<float64_t>(y_train.shape); np::Array<float64_t> weights = np::empty<float64_t>(y_train.shape);
const uint16_t t = np::sum(np::astype<uint16_t>(y_train)); const uint16_t t = np::sum(np::astype<uint16_t>(y_train));
@ -183,30 +172,13 @@ np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept {
})); }));
} }
/** np::Array<uint8_t> classify_weak_clf(const np::Array<int32_t>& X_feat_i, const size_t& j, const float64_t& threshold, const float64_t& polarity) noexcept {
* @brief Classify the integrated features based on polarity and threshold.
*
* @param X_feat_i Integrated features
* @param j Index of the classifier
* @param threshold Trained threshold
* @param polarity Trained polarity
* @return Classified features
*/
static np::Array<uint8_t> classify_weak_clf(const np::Array<int32_t>& X_feat_i, const size_t& j, const float64_t& threshold, const float64_t& polarity) noexcept {
np::Array<uint8_t> res = np::empty<uint8_t>({ X_feat_i.shape[1] }); np::Array<uint8_t> res = np::empty<uint8_t>({ X_feat_i.shape[1] });
for(size_t i = 0; i < res.shape[0]; ++i) for(size_t i = 0; i < res.shape[0]; ++i)
res[i] = polarity * X_feat_i[j * X_feat_i.shape[1] + i] < polarity * threshold ? 1 : 0; res[i] = polarity * X_feat_i[j * X_feat_i.shape[1] + i] < polarity * threshold ? 1 : 0;
return res; return res;
} }
/**
* @brief Classify the trained classifiers on the given features.
*
* @param alphas Trained alphas
* @param classifiers Trained classifiers
* @param X_feat integrated features
* @return Classification results
*/
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, const np::Array<float64_t>& classifiers, const np::Array<int32_t>& X_feat) noexcept { np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, const np::Array<float64_t>& classifiers, const np::Array<int32_t>& X_feat) noexcept {
np::Array<float64_t> total = np::zeros<float64_t>({ X_feat.shape[1] }); np::Array<float64_t> total = np::zeros<float64_t>({ X_feat.shape[1] });
@ -227,15 +199,6 @@ np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, cons
return y_pred; return y_pred;
} }
/**
* @brief Select the best classifer given their predictions.
*
* @param classifiers The weak classifiers
* @param weights Trained weights of each classifiers
* @param X_feat Integrated features
* @param y Features labels
* @return Index of the best classifier, the best error and the best accuracy
*/
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>& classifiers, const np::Array<float64_t>& weights, const np::Array<int32_t>& X_feat, const np::Array<uint8_t>& y) noexcept { std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>& classifiers, const np::Array<float64_t>& weights, const np::Array<int32_t>& X_feat, const np::Array<uint8_t>& y) noexcept {
std::tuple<int32_t, float64_t, np::Array<float64_t>> res = { -1, np::inf, np::empty<float64_t>({ X_feat.shape[0] }) }; std::tuple<int32_t, float64_t, np::Array<float64_t>> res = { -1, np::inf, np::empty<float64_t>({ X_feat.shape[0] }) };
@ -253,15 +216,6 @@ std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array
return res; return res;
} }
/**
* @brief Train the weak calssifiers.
*
* @param T Number of weak classifiers
* @param X_feat Integrated features
* @param X_feat_argsort Sorted indexes of the integrated features
* @param y Features labels
* @return List of trained alphas and the list of the final classifiers
*/
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y) noexcept { std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y) noexcept {
np::Array<float64_t> weights = init_weights(y); np::Array<float64_t> weights = init_weights(y);
np::Array<float64_t> alphas = np::empty<float64_t>({ T }); np::Array<float64_t> alphas = np::empty<float64_t>({ T });
@ -269,7 +223,11 @@ std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np:
for(size_t t = 0; t < T; ++t ){ for(size_t t = 0; t < T; ++t ){
weights /= np::sum(weights); weights /= np::sum(weights);
const np::Array<float64_t> classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights); #if GPU_BOOSTED
const np::Array<float64_t> classifiers = train_weak_clf_gpu(X_feat, X_feat_argsort, y, weights);
#else
const np::Array<float64_t> classifiers = train_weak_clf_cpu(X_feat, X_feat_argsort, y, weights);
#endif
const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y); const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y);
float64_t beta = error / (1.0 - error); float64_t beta = error / (1.0 - error);
weights *= np::pow(beta, (1.0 - accuracy)); weights *= np::pow(beta, (1.0 - accuracy));
@ -281,13 +239,6 @@ std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np:
return { alphas, final_classifier }; return { alphas, final_classifier };
} }
/**
* @brief Compute the accuracy score i.e. how a given set of measurements are close to their true value.
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed accuracy score
*/
float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept { float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
float64_t res = 0.0; float64_t res = 0.0;
for(size_t i = 0; i < y.shape[0]; ++i) for(size_t i = 0; i < y.shape[0]; ++i)
@ -296,13 +247,6 @@ float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>&
return res / y.shape[0]; return res / y.shape[0];
} }
/**
* @brief Compute the precision score i.e. how a given set of measurements are close to each other.
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed precision score
*/
float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept { float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
uint16_t true_positive = 0, false_positive = 0; uint16_t true_positive = 0, false_positive = 0;
for(size_t i = 0; i < y.shape[0]; ++i) for(size_t i = 0; i < y.shape[0]; ++i)
@ -315,13 +259,6 @@ float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>&
return static_cast<float64_t>(true_positive) / (true_positive + false_positive); return static_cast<float64_t>(true_positive) / (true_positive + false_positive);
} }
/**
* @brief Compute the recall score i.e. the ratio (TP / (TP + FN)) where TP is the number of true positives and FN the number of false negatives.
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed recall score
*/
float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept { float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
uint16_t true_positive = 0, false_negative = 0; uint16_t true_positive = 0, false_negative = 0;
for(size_t i = 0; i < y.shape[0]; ++i) for(size_t i = 0; i < y.shape[0]; ++i)
@ -335,35 +272,12 @@ float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_
return static_cast<float64_t>(true_positive) / (true_positive + false_negative); return static_cast<float64_t>(true_positive) / (true_positive + false_negative);
} }
/**
* @brief Compute the F1 score aka balanced F-score or F-measure.
*
* F1 = (2 * TP) / (2 * TP + FP + FN)
* where TP is the true positives,
* FP is the false positives,
* and FN is the false negatives
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed F1 score
*/
float64_t f1_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept { float64_t f1_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
const float64_t precision = precision_score(y, y_pred); const float64_t precision = precision_score(y, y_pred);
const float64_t recall = recall_score(y, y_pred); const float64_t recall = recall_score(y, y_pred);
return 2 * (precision * recall) / (precision + recall); return 2 * (precision * recall) / (precision + recall);
} }
/**
* @brief Compute the confusion matrix to evaluate a given classification.
*
* A confusion matrix of a binary classification consists of a 2x2 matrix containing
* | True negatives | False positives |
* | False negatives | True positives |
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed confusion matrix
*/
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept { std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
uint16_t true_positive = 0, false_positive = 0, true_negative = 0, false_negative = 0; uint16_t true_positive = 0, false_positive = 0, true_negative = 0, false_negative = 0;
for(size_t i = 0; i < y.shape[0]; ++i) for(size_t i = 0; i < y.shape[0]; ++i)
@ -379,3 +293,4 @@ std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Ar
++false_positive; ++false_positive;
return std::make_tuple(true_negative, false_positive, false_negative, true_positive); return std::make_tuple(true_negative, false_positive, false_negative, true_positive);
} }

View File

@ -2,15 +2,8 @@
#include <filesystem> #include <filesystem>
#include "data.hpp" #include "data.hpp"
#include "toolbox.hpp" #include "toolbox.hpp"
//#include "config.hpp"
/**
* @brief Test if a array from a CPU computation is equal to a GPU computation equivalent.
*
* @tparam T Inner type of the arrays to test
* @param cpu CPU Array
* @param gpu GPU Array
* @return Whether the test was succesful
*/
template <typename T> template <typename T>
bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept { bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
if (cpu.shape != gpu.shape) { if (cpu.shape != gpu.shape) {
@ -34,14 +27,6 @@ bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noex
return eq == length; return eq == length;
} }
/**
* @brief Test if a given 2D array of indices sort a given 2D array
*
* @tparam T Inner type of the array to test
* @param a 2D Array of data
* @param indices 2D Indices that sort the array
* @return Whether the test was successful
*/
template <typename T> template <typename T>
bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept { bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
if (a.shape != indices.shape) { if (a.shape != indices.shape) {
@ -66,18 +51,6 @@ bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indi
return correct == total; return correct == total;
} }
/**
* @brief Benchmark a function and display the result in stdout.
*
* @tparam T Resulting type of the function to benchmark
* @tparam F Signature of the function to call
* @tparam Args Arguments variadic of the function to call
* @param step_name Name of the function to log
* @param column_width Width of the column to print during logging
* @param fnc Function to benchmark
* @param args Arguments to pass to the function to call
* @return Result of the benchmarked function
*/
template <typename T, typename F, typename... Args> template <typename T, typename F, typename... Args>
T benchmark_function(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept { T benchmark_function(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
#if __DEBUG == false #if __DEBUG == false
@ -91,16 +64,6 @@ T benchmark_function(const char* const step_name, const int32_t& column_width, c
return res; return res;
} }
/**
* @brief Benchmark a function and display the result in stdout.
*
* @tparam F Signature of the function to call
* @tparam Args Arguments variadic of the function to call
* @param step_name Name of the function to log
* @param column_width Width of the column to print during logging
* @param fnc Function to benchmark
* @param args Arguments to pass to the function to call
*/
template <typename F, typename... Args> template <typename F, typename... Args>
void benchmark_function_void(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept { void benchmark_function_void(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
#if __DEBUG == false #if __DEBUG == false
@ -113,22 +76,6 @@ void benchmark_function_void(const char* const step_name, const int32_t& column_
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
} }
/**
* @brief Either execute a function then save the result or load the already cached result.
*
* @tparam T Inner type of the resulting array
* @tparam F Signature of the function to call
* @tparam Args Arguments variadic of the function to call
* @param step_name Name of the function to log
* @param column_width Width of the column to print during logging
* @param filename Name of the filename where the result is saved
* @param force_redo Recall the function even if the result is already saved, ignored if result is not cached
* @param save_state Whether the computed result will be saved or not, ignore if loading already cached result
* @param out_dir Path of the directory to save the result
* @param fnc Function to call
* @param args Arguments to pass to the function to call
* @return The result of the called function
*/
template <typename T, typename F, typename... Args> template <typename T, typename F, typename... Args>
np::Array<T> state_saver(const char* const step_name, const int32_t& column_width, const char* const filename, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept { np::Array<T> state_saver(const char* const step_name, const int32_t& column_width, const char* const filename, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
char filepath[BUFFER_SIZE] = { 0 }; char filepath[BUFFER_SIZE] = { 0 };
@ -136,6 +83,7 @@ np::Array<T> state_saver(const char* const step_name, const int32_t& column_widt
np::Array<T> bin; np::Array<T> bin;
if (!std::filesystem::exists(filepath) || force_redo) { if (!std::filesystem::exists(filepath) || force_redo) {
//bin = std::move(benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...));
bin = benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...); bin = benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...);
if(save_state){ if(save_state){
#if __DEBUG == false #if __DEBUG == false
@ -153,35 +101,20 @@ np::Array<T> state_saver(const char* const step_name, const int32_t& column_widt
fprintf(stderr, "Loading results of %s\r", step_name); fprintf(stderr, "Loading results of %s\r", step_name);
fflush(stderr); fflush(stderr);
#endif #endif
//bin = std::move(load<T>(filepath));
bin = load<T>(filepath); bin = load<T>(filepath);
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" }); formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
} }
return bin; return bin;
} }
/**
* @brief Either execute a function then saves the results or load the already cached result.
*
* @tparam T Inner type of the resulting arrays
* @tparam F Signature of the function to call
* @tparam Args Arguments variadic of the function to call
* @param step_name Name of the function to log
* @param column_width Width of the column to print during logging
* @param filenames List of names of the filenames where the results are save
* @param force_redo Recall the function even if the results are already saved, ignored if results are not cached
* @param save_state Whether the computed results will be saved or not, ignored if loading already cached results
* @param out_dir Path of the directory to save the results
* @param fnc Function to call
* @param args Arguments to pass to the function to call
* @return The results of the called function
*/
template <typename T, size_t N, typename F, typename... Args> template <typename T, size_t N, typename F, typename... Args>
std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32_t& column_width, const std::vector<const char*>& filenames, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept { std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32_t& column_width, const std::vector<const char*>& filenames, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
char filepath[BUFFER_SIZE] = { 0 }; char filepath[BUFFER_SIZE] = { 0 };
bool abs = false; bool abs = false;
for (const char* const filename : filenames){ for (const char* filename : filenames){
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename); sprintf(filepath, "%s/%s.bin", out_dir, filename);
if (!std::filesystem::exists(filepath)) { if (!fs::exists(filepath)) {
abs = true; abs = true;
break; break;
} }
@ -189,6 +122,7 @@ std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32
std::array<np::Array<T>, N> bin; std::array<np::Array<T>, N> bin;
if (abs || force_redo) { if (abs || force_redo) {
//bin = std::move(benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...));
bin = benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...); bin = benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...);
if (save_state){ if (save_state){
#if __DEBUG == false #if __DEBUG == false
@ -196,8 +130,8 @@ std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32
fflush(stderr); fflush(stderr);
#endif #endif
size_t i = 0; size_t i = 0;
for (const char* const filename : filenames){ for (const char* filename : filenames){
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename); sprintf(filepath, "%s/%s.bin", out_dir, filename);
save<T>(bin[i++], filepath); save<T>(bin[i++], filepath);
} }
#if __DEBUG == false #if __DEBUG == false
@ -211,116 +145,25 @@ std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32
fflush(stderr); fflush(stderr);
#endif #endif
size_t i = 0; size_t i = 0;
for (const char* const filename : filenames){ for (const char* filename : filenames){
bin[i++] = std::move(load<T>(filepath));
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename); snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename);
bin[i++] = load<T>(filepath);
} }
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" }); formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
} }
return bin; return bin;
} }
/** np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>&) noexcept;
* @brief Initialize the features based on the input shape.
*
* @param width Width of the image
* @param height Height of the image
* @return The initialized features
*/
np::Array<uint8_t> build_features(const uint16_t&, const uint16_t&) noexcept; np::Array<uint8_t> build_features(const uint16_t&, const uint16_t&) noexcept;
//np::Array<int32_t> select_percentile(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept; np::Array<int> select_percentile(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
/**
* @brief Classify the trained classifiers on the given features.
*
* @param alphas Trained alphas
* @param classifiers Trained classifiers
* @param X_feat integrated features
* @return Classification results
*/
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&) noexcept; np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&) noexcept;
/**
* @brief Initialize the weights of the weak classifiers based on the training labels.
*
* @param y_train Training labels
* @return The initialized weights
*/
np::Array<float64_t> init_weights(const np::Array<uint8_t>&) noexcept; np::Array<float64_t> init_weights(const np::Array<uint8_t>&) noexcept;
/**
* @brief Select the best classifer given their predictions.
*
* @param classifiers The weak classifiers
* @param weights Trained weights of each classifiers
* @param X_feat Integrated features
* @param y Features labels
* @return Index of the best classifier, the best error and the best accuracy
*/
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&, std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&,
const np::Array<uint8_t>&) noexcept; const np::Array<uint8_t>&) noexcept;
/**
* @brief Train the weak calssifiers.
*
* @param T Number of weak classifiers
* @param X_feat Integrated features
* @param X_feat_argsort Sorted indexes of the integrated features
* @param y Features labels
* @return List of trained alphas and the list of the final classifiers
*/
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t&, const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&) noexcept; std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t&, const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&) noexcept;
/**
* @brief Compute the accuracy score i.e. how a given set of measurements are close to their true value.
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed accuracy score
*/
float64_t accuracy_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept; float64_t accuracy_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
/**
* @brief Compute the precision score i.e. how a given set of measurements are close to each other.
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed precision score
*/
float64_t precision_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept; float64_t precision_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
/**
* @brief Compute the recall score i.e. the ratio (TP / (TP + FN)) where TP is the number of true positives and FN the number of false negatives.
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed recall score
*/
float64_t recall_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept; float64_t recall_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
/**
* @brief Compute the F1 score aka balanced F-score or F-measure.
*
* F1 = (2 * TP) / (2 * TP + FP + FN)
* where TP is the true positives,
* FP is the false positives,
* and FN is the false negatives
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed F1 score
*/
float64_t f1_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept; float64_t f1_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
/**
* @brief Compute the confusion matrix to evaluate a given classification.
*
* A confusion matrix of a binary classification consists of a 2x2 matrix containing
* | True negatives | False positives |
* | False negatives | True positives |
*
* @param y Ground truth labels
* @param y_pred Predicted labels
* @return computed confusion matrix
*/
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept; std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;

View File

@ -1,15 +1,7 @@
#include "data.hpp" #include "data.hpp"
#include "config.hpp" #include "toolbox.hpp"
#if GPU_BOOSTED == false np::Array<uint32_t> set_integral_image_cpu(const np::Array<uint8_t>& set) noexcept {
/**
* @brief Transform the input images in integrated images (CPU version).
*
* @param X Dataset of images
* @return Dataset of integrated images
*/
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& set) noexcept {
np::Array<uint32_t> X_ii = np::empty<uint32_t>(set.shape); np::Array<uint32_t> X_ii = np::empty<uint32_t>(set.shape);
size_t i, y, x, s; size_t i, y, x, s;
@ -39,14 +31,7 @@ constexpr static inline int16_t __compute_feature__(const np::Array<uint32_t>& X
return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w]; return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w];
} }
/** np::Array<int32_t> apply_features_cpu(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
* @brief Apply the features on a integrated image dataset (CPU version).
*
* @param feats Features to apply
* @param X_ii Integrated image dataset
* @return Applied features
*/
np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] }); np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
size_t j, feat_idx = 0; size_t j, feat_idx = 0;
@ -66,7 +51,7 @@ np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Arr
return X_feat; return X_feat;
} }
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y, const np::Array<float64_t>& weights) noexcept { np::Array<float64_t> train_weak_clf_cpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y, const np::Array<float64_t>& weights) noexcept {
float64_t total_pos = 0.0, total_neg = 0.0; float64_t total_pos = 0.0, total_neg = 0.0;
for(size_t i = 0; i < y.shape[0]; ++i) for(size_t i = 0; i < y.shape[0]; ++i)
(y[i] == static_cast<uint8_t>(1) ? total_pos : total_neg) += weights[i]; (y[i] == static_cast<uint8_t>(1) ? total_pos : total_neg) += weights[i];
@ -96,69 +81,7 @@ np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::
return classifiers; return classifiers;
} }
/** np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>& X_feat) noexcept {
* @brief Perform an indirect sort of a given array within a given bound.
*
* @tparam T Inner type of the array
* @param a Array to sort
* @param indices Array of indices to write to
* @param low lower bound to sort
* @param high higher bound to sort
*/
template<typename T>
static void argsort(const T* const a, uint16_t* const indices, size_t low, size_t high) noexcept {
const size_t total = high - low + 1;
size_t* const stack = new size_t[total]{low, high};
//size_t stack[total];
//stack[0] = l;
//stack[1] = h;
size_t top = 1;
while (top <= total) {
high = stack[top--];
low = stack[top--];
if(low >= high)
break;
const size_t p = as_partition(a, indices, low, high);
if (p - 1 > low && p - 1 < total) {
stack[++top] = low;
stack[++top] = p - 1;
}
if (p + 1 < high) {
stack[++top] = p + 1;
stack[++top] = high;
}
}
delete[] stack;
}
/**
* @brief Apply argsort to every column of a given 2D array.
*
* @tparam T Inner type of the array
* @param a 2D Array to sort
* @return 2D Array of indices that sort the array
*/
template<typename T>
static np::Array<uint16_t> argsort_bounded(const np::Array<T>& a, const size_t& low, const size_t& high) noexcept {
np::Array<uint16_t> indices = np::empty(a.shape);
map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; });
argsort_bounded(a, indices, low, high);
return indices;
}
/**
* @brief Perform an indirect sort on each column of a given 2D array (CPU version).
*
* @param a 2D Array to sort
* @return 2D Array of indices that sort the array
*/
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& X_feat) noexcept {
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape); const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
const size_t length = np::prod(X_feat.shape); const size_t length = np::prod(X_feat.shape);
for (size_t i = 0; i < length; i += X_feat.shape[1]) { for (size_t i = 0; i < length; i += X_feat.shape[1]) {
@ -168,4 +91,3 @@ np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& X_feat) noexcept {
return indices; return indices;
} }
#endif // GPU_BOOSTED == false

8
cpp/ViolaJonesCPU.hpp Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include "data.hpp"
np::Array<uint32_t> set_integral_image_cpu(const np::Array<uint8_t>&) noexcept;
np::Array<int32_t> apply_features_cpu(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
np::Array<float64_t> train_weak_clf_cpu(const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&,
const np::Array<float64_t>&) noexcept;
np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>&) noexcept;

View File

@ -1,14 +1,5 @@
#include "data.hpp" #include "data.hpp"
#include "config.hpp"
#if GPU_BOOSTED
/**
* @brief Prefix Sum (scan) of a given dataset.
*
* @param X Dataset of images to apply sum
* @return Scanned dataset of images
*/
static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept { static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept {
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape); np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
const size_t total = np::prod(X_scan.shape); const size_t total = np::prod(X_scan.shape);
@ -25,14 +16,6 @@ static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept
return X_scan; return X_scan;
} }
/**
* @brief GPU kernel used to do a parallel prefix sum (scan).
*
* @param n Number of width blocks
* @param j Temporary sum index
* @param d_inter Temporary sums on device to add
* @param d_X Dataset of images on device to apply sum
*/
static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np::Array<uint32_t> d_inter, np::Array<uint32_t> d_X) { static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np::Array<uint32_t> d_inter, np::Array<uint32_t> d_X) {
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x; const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y; const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
@ -77,14 +60,6 @@ static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] = sA[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y]; d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] = sA[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
} }
/**
* @brief GPU kernel for parallel sum.
*
* @param d_X Dataset of images on device
* @param d_s Temporary sums to add on device
* @param n Number of width blocks
* @param m Height of a block
*/
static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint32_t> d_s, const uint16_t n, const uint16_t m) { static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint32_t> d_s, const uint16_t n, const uint16_t m) {
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x; const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y; const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
@ -92,14 +67,6 @@ static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint3
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] += d_s[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + blockIdx.x]; d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] += d_s[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + blockIdx.x];
} }
/**
* @brief Parallel Prefix Sum (scan) of a given dataset.
*
* Read more: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
*
* @param X Dataset of images
* @return Scanned dataset of images
*/
static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept { static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept {
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape); np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
@ -145,12 +112,6 @@ static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept
return X_scan; return X_scan;
} }
/**
* @brief GPU kernel of the function __transpose_3d__.
*
* @param d_X Dataset of images on device
* @param d_Xt Transposed dataset of images on device
*/
static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::Array<uint32_t> d_Xt) { static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::Array<uint32_t> d_Xt) {
__shared__ uint32_t temp[NB_THREADS_2D_X * NB_THREADS_2D_Y]; __shared__ uint32_t temp[NB_THREADS_2D_X * NB_THREADS_2D_Y];
@ -167,12 +128,6 @@ static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::A
d_Xt[blockIdx.z * d_Xt.shape[1] * d_Xt.shape[2] + x * d_X.shape[2] + y] = temp[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y]; d_Xt[blockIdx.z * d_Xt.shape[1] * d_Xt.shape[2] + x * d_X.shape[2] + y] = temp[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
} }
/**
* @brief Transpose every images in the given dataset.
*
* @param X Dataset of images
* @return Transposed dataset of images
*/
static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexcept { static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexcept {
np::Array<uint32_t> Xt = np::empty<uint32_t>({ X.shape[0], X.shape[2], X.shape[1] }); np::Array<uint32_t> Xt = np::empty<uint32_t>({ X.shape[0], X.shape[2], X.shape[1] });
@ -192,13 +147,7 @@ static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexce
return Xt; return Xt;
} }
/** np::Array<uint32_t> set_integral_image_gpu(const np::Array<uint8_t>& X) noexcept {
* @brief Transform the input images in integrated images (GPU version).
*
* @param X Dataset of images
* @return Dataset of integrated images
*/
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& X) noexcept {
np::Array<uint32_t> X_ii = np::astype<uint32_t>(X); np::Array<uint32_t> X_ii = np::astype<uint32_t>(X);
X_ii = __scanCPU_3d__(X_ii); X_ii = __scanCPU_3d__(X_ii);
X_ii = __transpose_3d__(X_ii); X_ii = __transpose_3d__(X_ii);
@ -206,17 +155,53 @@ np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& X) noexcept {
return __transpose_3d__(X_ii); return __transpose_3d__(X_ii);
} }
/** static inline __device__ int16_t __compute_feature__(const np::Array<uint32_t>& d_X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept {
* @brief GPU kernel of the function train_weak_clf. const size_t _y = y * d_X_ii.shape[1] + x;
* const size_t _yh = _y + h * d_X_ii.shape[1];
* @param d_classifiers Weak classifiers on device to train return d_X_ii[j + _yh + w] + d_X_ii[j + _y] - d_X_ii[j + _yh] - d_X_ii[j + _y + w];
* @param d_y Labels of the features on device }
* @param d_X_feat Feature images dataset on device
* @param d_X_feat_argsort Sorted indexes of the integrated features on device static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Array<uint8_t> d_feats, const np::Array<uint32_t> d_X_ii) {
* @param d_weights Weights of the features on device size_t i = blockIdx.x * blockDim.x + threadIdx.x;
* @param total_pos Total of positive labels in the dataset size_t j = blockIdx.y * blockDim.y + threadIdx.y;
* @param total_neg Total of negative labels in the dataset
*/ if (i >= d_feats.shape[0] || j >= d_X_ii.shape[0])
return;
const size_t k = i * d_X_ii.shape[0] + j;
i *= np::prod(d_feats.shape, 1);
j *= np::prod(d_X_ii.shape, 1);
const int16_t p1 = __compute_feature__(d_X_ii, j, d_feats[i + 0], d_feats[i + 1], d_feats[i + 2], d_feats[i + 3]);
const int16_t p2 = __compute_feature__(d_X_ii, j, d_feats[i + 4], d_feats[i + 5], d_feats[i + 6], d_feats[i + 7]);
const int16_t n1 = __compute_feature__(d_X_ii, j, d_feats[i + 8], d_feats[i + 9], d_feats[i + 10], d_feats[i + 11]);
const int16_t n2 = __compute_feature__(d_X_ii, j, d_feats[i + 12], d_feats[i + 13], d_feats[i + 14], d_feats[i + 15]);
d_X_feat[k] = static_cast<int32_t>(p1 + p2) - static_cast<int32_t>(n1 + n2);
}
np::Array<int32_t> apply_features_gpu(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
const np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
int32_t* d_X_feat;
_print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t)));
np::Array<uint32_t> d_X_ii = copyToDevice<uint32_t>("X_ii", X_ii);
np::Array<uint8_t> d_feats = copyToDevice<uint8_t>("feats", feats);
const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X)));
const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
const dim3 dimGrid(dimX, dimY);
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
__apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii);
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
_print_cuda_error_("memcpy X_feat", cudaMemcpy(X_feat.data, d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t), cudaMemcpyDeviceToHost));
_print_cuda_error_("free d_X_feat", cudaFree(d_X_feat));
cudaFree("free d_feats", d_feats);
cudaFree("free d_X_11", d_X_ii);
return X_feat;
}
static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifiers, const np::Array<uint8_t> d_y, static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifiers, const np::Array<uint8_t> d_y,
const np::Array<int32_t> d_X_feat, const np::Array<uint16_t> d_X_feat_argsort, const np::Array<int32_t> d_X_feat, const np::Array<uint16_t> d_X_feat_argsort,
const np::Array<float64_t> d_weights, const float64_t total_pos, const float64_t total_neg) { const np::Array<float64_t> d_weights, const float64_t total_pos, const float64_t total_neg) {
@ -225,7 +210,7 @@ static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifi
i += threadIdx.x * blockDim.y * blockDim.z; i += threadIdx.x * blockDim.y * blockDim.z;
i += threadIdx.y * blockDim.z; i += threadIdx.y * blockDim.z;
i += threadIdx.z; i += threadIdx.z;
// const size_t i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= d_classifiers.shape[0]) if(i >= d_classifiers.shape[0])
return; return;
@ -250,16 +235,7 @@ static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifi
d_classifiers[i * 2] = best_threshold; d_classifiers[i * 2 + 1] = best_polarity; d_classifiers[i * 2] = best_threshold; d_classifiers[i * 2 + 1] = best_polarity;
} }
/** np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
* @brief Train the weak classifiers on a given dataset (GPU version).
*
* @param X_feat Feature images dataset
* @param X_feat_argsort Sorted indexes of the integrated features
* @param y Labels of the features
* @param weights Weights of the features
* @return Trained weak classifiers
*/
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
const np::Array<float64_t>& weights) noexcept { const np::Array<float64_t>& weights) noexcept {
float64_t total_pos = 0.0, total_neg = 0.0; float64_t total_pos = 0.0, total_neg = 0.0;
for(size_t i = 0; i < y.shape[0]; ++i) for(size_t i = 0; i < y.shape[0]; ++i)
@ -275,6 +251,8 @@ np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::
const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z))); const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z)));
constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z); constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z);
// const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS)));
// constexpr const dim3 dimBlock(NB_THREADS);
__train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg); __train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg);
_print_cuda_error_("synchronize", cudaDeviceSynchronize()); _print_cuda_error_("synchronize", cudaDeviceSynchronize());
@ -289,118 +267,28 @@ np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::
return classifiers; return classifiers;
} }
/**
* @brief Compute a feature on a integrated image at a specific coordinate (GPU version).
*
* @param d_X_ii Dataset of integrated images on device
* @param j Image index in the dataset
* @param x X coordinate
* @param y Y coordinate
* @param w width of the feature
* @param h height of the feature
*/
static inline __device__ int16_t __compute_feature__(const np::Array<uint32_t>& d_X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept {
const size_t _y = y * d_X_ii.shape[1] + x;
const size_t _yh = _y + h * d_X_ii.shape[1];
return d_X_ii[j + _yh + w] + d_X_ii[j + _y] - d_X_ii[j + _yh] - d_X_ii[j + _y + w];
}
/**
* @brief GPU kernel of the function apply_features.
*
* @param d_X_feat Dataset of image features on device
* @param d_feats Features on device to apply
* @param d_X_ii Integrated image dataset on device
*/
static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Array<uint8_t> d_feats, const np::Array<uint32_t> d_X_ii) {
size_t i = blockIdx.x * blockDim.x + threadIdx.x;
size_t j = blockIdx.y * blockDim.y + threadIdx.y;
if (i >= d_feats.shape[0] || j >= d_X_ii.shape[0])
return;
const size_t k = i * d_X_ii.shape[0] + j;
i *= np::prod(d_feats.shape, 1);
j *= np::prod(d_X_ii.shape, 1);
const int16_t p1 = __compute_feature__(d_X_ii, j, d_feats[i + 0], d_feats[i + 1], d_feats[i + 2], d_feats[i + 3]);
const int16_t p2 = __compute_feature__(d_X_ii, j, d_feats[i + 4], d_feats[i + 5], d_feats[i + 6], d_feats[i + 7]);
const int16_t n1 = __compute_feature__(d_X_ii, j, d_feats[i + 8], d_feats[i + 9], d_feats[i + 10], d_feats[i + 11]);
const int16_t n2 = __compute_feature__(d_X_ii, j, d_feats[i + 12], d_feats[i + 13], d_feats[i + 14], d_feats[i + 15]);
d_X_feat[k] = static_cast<int32_t>(p1 + p2) - static_cast<int32_t>(n1 + n2);
}
/**
* @brief Apply the features on a integrated image dataset (GPU version).
*
* @param feats Features to apply
* @param X_ii Integrated image dataset
* @return Applied features
*/
np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
const np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
int32_t* d_X_feat = nullptr;
_print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t)));
np::Array<uint32_t> d_X_ii = copyToDevice<uint32_t>("X_ii", X_ii);
np::Array<uint8_t> d_feats = copyToDevice<uint8_t>("feats", feats);
const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X)));
const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
const dim3 dimGrid(dimX, dimY);
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
__apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii);
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
_print_cuda_error_("memcpy X_feat", cudaMemcpy(X_feat.data, d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t), cudaMemcpyDeviceToHost));
_print_cuda_error_("free d_X_feat", cudaFree(d_X_feat));
cudaFree("free d_feats", d_feats);
cudaFree("free d_X_11", d_X_ii);
return X_feat;
}
/**
* @brief Partition of the argsort algorithm.
*
* @tparam T Inner type of the array
* @param d_a Array on device to sort
* @param d_indices Array of indices on device to write to
* @param low lower bound to sort
* @param high higher bound to sort
* @return Last index sorted
*/
template<typename T> template<typename T>
__device__ inline static int32_t _as_partition_(const T* d_a, uint16_t* const d_indices, const size_t low, const size_t high) noexcept { __device__ inline static int32_t as_partition_gpu(const T* a, uint16_t* indices, const size_t l, const size_t h) noexcept {
int32_t i = low - 1; int32_t i = l - 1;
for (int32_t j = low; j <= high; ++j) for (int32_t j = l; j <= h; ++j)
if (d_a[d_indices[j]] < d_a[d_indices[high]]) if (a[indices[j]] < a[indices[h]])
swap(&d_indices[++i], &d_indices[j]); swap(&indices[++i], &indices[j]);
swap(&d_indices[++i], &d_indices[high]); swap(&indices[++i], &indices[h]);
return i; return i;
} }
/**
* @brief Cuda kernel to perform an indirect sort of a given array within a given bound.
*
* @tparam T Inner type of the array
* @param d_a Array on device to sort
* @param d_indices Array of indices on device to write to
* @param low lower bound to sort
* @param high higher bound to sort
*/
template<typename T> template<typename T>
__device__ void argsort_kernel(const T* d_a, uint16_t* const d_indices, size_t low, size_t high) noexcept { __device__ void argsort_gpu(const T* a, uint16_t* indices, const size_t l, const size_t h) noexcept {
const size_t total = high - low + 1; const size_t total = h - l + 1;
//int32_t* stack = new int32_t[total]{low, high}; //int32_t* stack = new int32_t[total]{l, h};
//int32_t stack[total]; //int32_t stack[total];
int32_t stack[6977]; int32_t stack[6977];
//int32_t stack[1<<16]; //int32_t stack[1<<16];
stack[0] = low; stack[0] = l;
stack[1] = high; stack[1] = h;
size_t top = 1; size_t top = 1, low = l, high = h;
while (top <= total) { while (top <= total) {
high = stack[top--]; high = stack[top--];
@ -408,7 +296,7 @@ __device__ void argsort_kernel(const T* d_a, uint16_t* const d_indices, size_t l
if(low >= high) if(low >= high)
break; break;
const int32_t p = _as_partition_(d_a, d_indices, low, high); const int32_t p = as_partition_gpu(a, indices, low, high);
if (p - 1 > low && p - 1 < total) { if (p - 1 > low && p - 1 < total) {
stack[++top] = low; stack[++top] = low;
@ -423,49 +311,42 @@ __device__ void argsort_kernel(const T* d_a, uint16_t* const d_indices, size_t l
//delete[] stack; //delete[] stack;
} }
/**
* @brief Cuda kernel where argsort is applied to every column of a given 2D array.
*
* @tparam T Inner type of the array
* @param d_a 2D Array on device to sort
* @param d_indices 2D Array of indices on device to write to
*/
template<typename T> template<typename T>
__global__ void argsort_bounded(const np::Array<T> d_a, uint16_t* const d_indices){ __global__ void argsort_bounded_gpu(const np::Array<T> a, uint16_t* indices){
const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; const size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= d_a.shape[0]) if (idx >= a.shape[0])
return; return;
for(size_t y = 0; y < d_a.shape[1]; ++y) d_indices[idx * d_a.shape[1] + y] = y; for(size_t y = 0; y < a.shape[1]; ++y) indices[idx * a.shape[1] + y] = y;
argsort_kernel(&d_a[idx * d_a.shape[1]], &d_indices[idx * d_a.shape[1]], 0, d_a.shape[1] - 1); argsort_gpu(&a[idx * a.shape[1]], &indices[idx * a.shape[1]], 0, a.shape[1] - 1);
} }
/** np::Array<uint16_t> argsort_2d_gpu(const np::Array<int32_t>& X_feat) noexcept {
* @brief Perform an indirect sort on each column of a given 2D array (GPU version). const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
*
* @param a 2D Array to sort
* @return 2D Array of indices that sort the array
*/
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& a) noexcept {
const np::Array<uint16_t> indices = np::empty<uint16_t>(a.shape);
uint16_t* d_indices = nullptr; uint16_t* d_indices;
const size_t indices_size = np::prod(indices.shape) * sizeof(uint16_t); const size_t indices_size = np::prod(indices.shape) * sizeof(uint16_t);
np::Array<int32_t> d_a = copyToDevice<int32_t>("X_feat", a); np::Array<int32_t> d_X_feat = copyToDevice<int32_t>("X_feat", X_feat);
_print_cuda_error_("malloc d_indices", cudaMalloc(&d_indices, indices_size)); _print_cuda_error_("malloc d_indices", cudaMalloc(&d_indices, indices_size));
const size_t dimGrid = static_cast<size_t>(std::ceil(static_cast<float64_t>(a.shape[0]) / static_cast<float64_t>(NB_THREADS))); const size_t dimGrid = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS)));
const dim3 dimBlock(NB_THREADS); const dim3 dimBlock(NB_THREADS);
argsort_bounded<<<dimGrid, dimBlock>>>(d_a, d_indices); argsort_bounded_gpu<<<dimGrid, dimBlock>>>(d_X_feat, d_indices);
_print_cuda_error_("synchronize", cudaDeviceSynchronize()); _print_cuda_error_("synchronize", cudaDeviceSynchronize());
_print_cuda_error_("memcpy d_indices", cudaMemcpy(indices.data, d_indices, indices_size, cudaMemcpyDeviceToHost)); _print_cuda_error_("memcpy d_indices", cudaMemcpy(indices.data, d_indices, indices_size, cudaMemcpyDeviceToHost));
cudaFree("free d_a", d_a); cudaFree("free d_X_feat", d_X_feat);
_print_cuda_error_("free d_indices", cudaFree(d_indices)); _print_cuda_error_("free d_indices", cudaFree(d_indices));
return indices; return indices;
} }
#endif // GPU_BOOSTED __host__ __device__
size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
size_t result = shape[offset];
for(size_t i = 1 + offset; i < shape.length; ++i)
result *= shape[i];
return result;
}

8
cpp/ViolaJonesGPU.hpp Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include "data.hpp"
np::Array<uint32_t> set_integral_image_gpu(const np::Array<uint8_t>&) noexcept;
np::Array<int32_t> apply_features_gpu(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
const np::Array<float64_t>& weights) noexcept;
np::Array<uint16_t> argsort_2d_gpu(const np::Array<int32_t>& X_feat) noexcept;

View File

@ -1,39 +0,0 @@
#pragma once
#include "data.hpp"
/**
* @brief Transform the input images in integrated images.
*
* @param X Dataset of images
* @return Dataset of integrated images
*/
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>&) noexcept;
/**
* @brief Apply the features on a integrated image dataset.
*
* @param feats Features to apply
* @param X_ii Integrated image dataset
* @return Applied features
*/
np::Array<int32_t> apply_features(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
/**
* @brief Train the weak classifiers on a given dataset.
*
* @param X_feat Feature images dataset
* @param X_feat_argsort Sorted indexes of the integrated features
* @param y Labels of the features
* @param weights Weights of the features
* @return Trained weak classifiers
*/
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&,
const np::Array<float64_t>&) noexcept;
/**
* @brief Perform an indirect sort on each column of a given 2D array
*
* @param a 2D Array to sort
* @return 2D Array of indices that sort the array
*/
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>&) noexcept;

View File

@ -1,7 +1,9 @@
#include "data.hpp" #include "data.hpp"
//#include "toolbox.hpp"
//#include <cstring>
int32_t print(const np::Shape& shape) noexcept { int print(const np::Shape& shape) noexcept {
int32_t num_written = 0; int num_written = 0;
num_written += printf("("); num_written += printf("(");
if (shape.length > 1) { if (shape.length > 1) {
const size_t length = shape.length - 1; const size_t length = shape.length - 1;
@ -15,12 +17,20 @@ int32_t print(const np::Shape& shape) noexcept {
} }
template<typename T> template<typename T>
int32_t print(const np::Array<T>& array, const char* const format) noexcept { int print(const np::Array<T>& array, const char* format) noexcept {
//printf("[");
//const size_t length = np::prod(array.shape);
//for(size_t i = 0; i < length - 1; ++i)
// //std::cout << array[i] << " ";
// printf("%f ", array[i]);
////std::cout << array[array.shape[0] - 1] << "]\n";
//printf("%f]\n", array[length - 1]);
char format_space[BUFFER_SIZE] = { 0 }; char format_space[BUFFER_SIZE] = { 0 };
snprintf(format_space, BUFFER_SIZE,"%s ", format); sprintf(format_space, "%s ", format);
char format_close[BUFFER_SIZE] = { 0 }; char format_close[BUFFER_SIZE] = { 0 };
snprintf(format_close, BUFFER_SIZE,"%s]\n", format); sprintf(format_close, "%s]\n", format);
int32_t num_written = 0; int num_written = 0;
if (array.shape.length == 1) { if (array.shape.length == 1) {
const size_t max = array.shape[0] - 1; const size_t max = array.shape[0] - 1;
@ -43,16 +53,16 @@ int32_t print(const np::Array<T>& array, const char* const format) noexcept {
return num_written; return num_written;
} }
int32_t print(const np::Array<uint8_t>& array) noexcept { int print(const np::Array<uint8_t>& array) noexcept {
return print(array, "%hu"); return print(array, "%hu");
} }
int32_t print(const np::Array<float64_t>& array) noexcept { int print(const np::Array<float64_t>& array) noexcept {
return print(array, "%f"); return print(array, "%f");
} }
int32_t print_feat(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept { int print_feat(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept {
int32_t num_written = 0; int num_written = 0;
num_written += printf("["); num_written += printf("[");
const size_t feat_size = np::prod(array.shape, 1); const size_t feat_size = np::prod(array.shape, 1);
const size_t offset = slice.x * feat_size; const size_t offset = slice.x * feat_size;
@ -64,10 +74,10 @@ int32_t print_feat(const np::Array<uint8_t>& array, const np::Slice& slice) noex
return num_written; return num_written;
} }
int32_t print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept { int print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept {
int32_t num_written = 0; int num_written = 0;
if (array.shape.length == 1) { if (array.shape.length == 1) {
const size_t max = slice.y - 1; const size_t max = slice.y - 1; //std::min(slice.y, array.shape[0] - 1);
num_written += printf("["); num_written += printf("[");
for (size_t i = slice.x; i < max; ++i) for (size_t i = slice.x; i < max; ++i)
num_written += printf("%hu ", array[i]); num_written += printf("%hu ", array[i]);
@ -87,10 +97,10 @@ int32_t print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept
return num_written; return num_written;
} }
int32_t print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept { int print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept {
int32_t num_written = 0; int num_written = 0;
if (array.shape.length == 1) { if (array.shape.length == 1) {
const size_t max = slice.y - 1; const size_t max = slice.y - 1; //std::min(slice.y, array.shape[0] - 1);
num_written += printf("["); num_written += printf("[");
for (size_t i = slice.x; i < max; ++i) for (size_t i = slice.x; i < max; ++i)
num_written += printf("%iu ", array[i]); num_written += printf("%iu ", array[i]);
@ -105,35 +115,37 @@ int32_t print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept
num_written += printf("%5i ", array[k + i * array.shape[1] + j]); num_written += printf("%5i ", array[k + i * array.shape[1] + j]);
num_written += printf("]\n"); num_written += printf("]\n");
} }
num_written += printf("]"); num_written += print("]");
} }
return num_written; return num_written;
} }
int32_t print(const np::Array<int32_t>& array, const np::Slice& slice) noexcept { int print(const np::Array<int32_t>& array, const np::Slice& slice) noexcept {
int32_t num_written = 0; int num_written = 0;
num_written += printf("["); num_written += printf("[");
//size_t k = slice.x * array.shape[1] * array.shape[2] + slice.y * array.shape[2] + slice.z;
size_t k = slice.x * array.shape[1]; size_t k = slice.x * array.shape[1];
for (size_t i = k; i < k + (slice.y - slice.x); ++i) { for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
num_written += printf("%5i ", array[i]); num_written += printf("%5i ", array[i]);
} }
num_written += printf("]"); num_written += print("]");
return num_written; return num_written;
} }
int32_t print(const np::Array<uint16_t>& array, const np::Slice& slice) noexcept { int print(const np::Array<uint16_t>& array, const np::Slice& slice) noexcept {
int32_t num_written = 0; int num_written = 0;
num_written += printf("["); num_written += printf("[");
//size_t k = slice.x * array.shape[1] * array.shape[2] + slice.y * array.shape[2] + slice.z;
size_t k = slice.x * array.shape[1]; size_t k = slice.x * array.shape[1];
for (size_t i = k; i < k + (slice.y - slice.x); ++i) { for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
num_written += printf("%5hu ", array[i]); num_written += printf("%5hu ", array[i]);
} }
num_written += printf("]"); num_written += print("]");
return num_written; return num_written;
} }
static inline np::Array<uint8_t> load_set(const char* const set_name) { static inline np::Array<uint8_t> load_set(const char* set_name) {
FILE* const file = fopen(set_name, "rb"); FILE* file = fopen(set_name, "rb");
if (file == NULL) { if (file == NULL) {
print_error_file(set_name); print_error_file(set_name);
throw; throw;
@ -144,7 +156,7 @@ static inline np::Array<uint8_t> load_set(const char* const set_name) {
fclose(file); fclose(file);
throw; throw;
} }
size_t* const dims = new size_t[3](); size_t* dims = new size_t[3]();
if (!sscanf(meta, "%lu %lu %lu", &dims[0], &dims[1], &dims[2])) { if (!sscanf(meta, "%lu %lu %lu", &dims[0], &dims[1], &dims[2])) {
print_error_file(set_name); print_error_file(set_name);
fclose(file); fclose(file);
@ -155,12 +167,13 @@ static inline np::Array<uint8_t> load_set(const char* const set_name) {
const size_t size = np::prod(a.shape); const size_t size = np::prod(a.shape);
size_t i = 0, j = 0; size_t i = 0, j = 0;
int32_t c; int c;
char buff[STRING_INT_SIZE] = { 0 }; char buff[STRING_INT_SIZE] = { 0 };
while ((c = fgetc(file)) != EOF && i < size) { while ((c = fgetc(file)) != EOF && i < size) {
if (c == ' ' || c == '\n') { if (c == ' ' || c == '\n') {
buff[j] = '\0'; buff[j] = '\0';
a[i++] = static_cast<uint8_t>(atoi(buff)); a[i++] = static_cast<uint8_t>(atoi(buff));
//memset(buff, 0, STRING_INT_SIZE);
j = 0; j = 0;
} }
else else
@ -178,20 +191,22 @@ static inline np::Array<uint8_t> load_set(const char* const set_name) {
return a; return a;
} }
/** std::array<np::Array<uint8_t>, 4> load_datasets() {
* @brief Load the datasets.
*
* @return Array containing X_train, y_trait, X_test, y_test
*/
std::array<np::Array<uint8_t>, 4> load_datasets(void) {
return { return {
load_set(DATA_DIR "/X_train.bin"), load_set(DATA_DIR "/y_train.bin"), load_set(DATA_DIR "/X_train.bin"), load_set(DATA_DIR "/y_train.bin"),
load_set(DATA_DIR "/X_test.bin"), load_set(DATA_DIR "/y_test.bin") load_set(DATA_DIR "/X_test.bin"), load_set(DATA_DIR "/y_test.bin")
}; };
} }
void print_error_file(const char* const file_dir) noexcept { void print_error_file(const char* file_dir) noexcept {
const char* const buff = strerror(errno); const char* buff = strerror(errno);
fprintf(stderr, "Can't open %s, error code = %d : %s\n", file_dir, errno, buff); fprintf(stderr, "Can't open %s, error code = %d : %s\n", file_dir, errno, buff);
// delete buff; // delete buff;
} }
//size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
// size_t result = shape[offset];
// for(size_t i = 1 + offset; i < shape.length; ++i)
// result *= shape[i];
// return result;
//}

View File

@ -4,7 +4,7 @@
#include <cmath> #include <cmath>
#include <cassert> #include <cassert>
#include <functional> #include <functional>
#include <stdint.h> #include <memory>
#include "config.hpp" #include "config.hpp"
#define BUFFER_SIZE 256 #define BUFFER_SIZE 256
@ -19,6 +19,15 @@ typedef float float32_t;
typedef double float64_t; typedef double float64_t;
typedef long double float128_t; typedef long double float128_t;
__host__ __device__
constexpr inline int print(const char* str) noexcept {
return printf("%s\n", str);
}
inline int print(const std::string& s) noexcept {
return printf("%s\n", s.c_str());
}
namespace np { namespace np {
constexpr const float64_t inf = std::numeric_limits<float64_t>::infinity(); constexpr const float64_t inf = std::numeric_limits<float64_t>::infinity();
@ -35,16 +44,16 @@ namespace np {
#endif #endif
__host__ __device__ __host__ __device__
Shape(void) noexcept { Shape() noexcept {
#if __DEBUG // #if __DEBUG
printf("Shape created (default)\n"); // print("Shape created (default)");
#endif // #endif
} }
__host__ __device__ __host__ __device__
Shape(const size_t& length, size_t* const data) noexcept : length(length), data(data), refcount(new size_t(1)) { Shape(const size_t& length, size_t* data) noexcept : length(length), data(data), refcount(new size_t(1)) {
#if __DEBUG #if __DEBUG
printf("Shape created (raw)\n"); //print("Shape created (raw)");
for(size_t i = 0; i < length; ++i) for(size_t i = 0; i < length; ++i)
total *= data[i]; total *= data[i];
#endif #endif
@ -52,10 +61,10 @@ namespace np {
__host__ __device__ __host__ __device__
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) { Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
#if __DEBUG // #if __DEBUG
printf("Shape created (initializer)\n"); // print("Shape created (initializer)");
#endif // #endif
const size_t* const begin = dims.begin(); const size_t* begin = dims.begin();
for(size_t i = 0; i < length; ++i){ for(size_t i = 0; i < length; ++i){
data[i] = begin[i]; data[i] = begin[i];
#if __DEBUG #if __DEBUG
@ -67,49 +76,52 @@ namespace np {
__host__ __device__ __host__ __device__
Shape(const Shape& shape) noexcept { Shape(const Shape& shape) noexcept {
#if __DEBUG #if __DEBUG
printf("Shape created (copy)\n"); print("Shape created (copy)");
#endif #endif
if (data != nullptr && data != shape.data){ if (data != nullptr && data != shape.data){
#if __DEBUG #if __DEBUG
printf("Former shape deleted (copy)\n"); print("Former shape deleted (copy)");
#endif #endif
delete[] data; delete[] data;
} }
if (refcount != nullptr && refcount != shape.refcount){ if (refcount != nullptr && refcount != shape.refcount){
#if __DEBUG #if __DEBUG
printf("Former shape refcount freed (copy)\n"); print("Former shape refcount freed (copy)");
#endif #endif
delete refcount; delete refcount;
} }
length = shape.length; length = shape.length;
//data = new size_t[length];
//memcpy(data, shape.data, length * sizeof(size_t));
//refcount = new size_t;
//memcpy(refcount, shape.refcount, sizeof(size_t));
data = shape.data; data = shape.data;
refcount = shape.refcount; refcount = shape.refcount;
if (refcount != nullptr) if (refcount != nullptr)
(*refcount)++; (*refcount)++;
#if __DEBUG #if __DEBUG
else else
printf("Moved shape has null refcount\n"); print("Moved shape has null refcount");
#endif
#if __DEBUG
total = shape.total; total = shape.total;
#endif #endif
} }
__host__ __device__ __host__ __device__
Shape(Shape&& shape) noexcept { Shape(Shape&& shape) noexcept {
#if __DEBUG // #if __DEBUG
printf("Shape created (move)\n"); // print("Shape created (move));
#endif // #endif
if (data != nullptr && data != shape.data){ if (data != nullptr && data != shape.data){
#if __DEBUG #if __DEBUG
printf("Former shape deleted (move)\n"); print("Former shape deleted (move)");
#endif #endif
delete[] data; delete[] data;
} }
if (refcount != nullptr && refcount != shape.refcount){ if (refcount != nullptr && refcount != shape.refcount){
#if __DEBUG #if __DEBUG
printf("Former shape refcount freed (move)\n"); print("Former shape refcount freed (move)");
#endif #endif
delete refcount; delete refcount;
} }
@ -127,29 +139,29 @@ namespace np {
} }
__host__ __device__ __host__ __device__
~Shape(void) noexcept { ~Shape() noexcept {
if(refcount == nullptr){ if(refcount == nullptr){
#if __DEBUG // #if __DEBUG
printf("Shape refcount freed more than once\n"); // print("Shape refcount freed more than once");
#endif // #endif
return; return;
} }
--(*refcount); --(*refcount);
#if __DEBUG // #if __DEBUG
printf("Shape destructed : %lu\n", *refcount); // printf("Shape destructed : %lu\n", *refcount);
#endif // #endif
if(*refcount == 0){ if(*refcount == 0){
if (data != nullptr){ if (data != nullptr){
delete[] data; delete[] data;
data = nullptr; data = nullptr;
#if __DEBUG // #if __DEBUG
printf("Shape freeing ...\n"); // print("Shape freeing ...");
#endif // #endif
} }
#if __DEBUG //#if __DEBUG
else else
printf("Shape freed more than once : %lu\n", *refcount); printf("Shape freed more than once : %lu\n", *refcount);
#endif //#endif
delete refcount; delete refcount;
refcount = nullptr; refcount = nullptr;
#if __DEBUG #if __DEBUG
@ -161,29 +173,34 @@ namespace np {
__host__ __device__ __host__ __device__
Shape& operator=(const Shape& shape) noexcept { Shape& operator=(const Shape& shape) noexcept {
#if __DEBUG #if __DEBUG
printf("Shape created (assign copy)\n"); print("Shape created (assign copy)");
#endif #endif
if (data != nullptr && data != shape.data){ if (data != nullptr && data != shape.data){
#if __DEBUG #if __DEBUG
printf("Former shape deleted (assign copy)\n"); print("Former shape deleted (assign copy)");
#endif #endif
delete[] data; delete[] data;
} }
if (refcount != nullptr && refcount != shape.refcount){ if (refcount != nullptr && refcount != shape.refcount){
#if __DEBUG #if __DEBUG
printf("Former shape refcount freed (assign copy)\n"); print("Former shape refcount freed (assign copy)");
#endif #endif
delete refcount; delete refcount;
} }
length = shape.length; length = shape.length;
// data = new size_t[length];
// memcpy(data, shape.data, length * sizeof(size_t));
// refcount = new size_t;
// memcpy(refcount, shape.refcount, sizeof(size_t));
data = shape.data; data = shape.data;
refcount = shape.refcount; refcount = shape.refcount;
if (refcount != nullptr) if (refcount != nullptr)
(*refcount)++; (*refcount)++;
#if __DEBUG #if __DEBUG
else else
printf("Assigned copy shape has null refcount\n"); printf("Assigned copy shape has null refcount");
total = shape.total; total = shape.total;
#endif #endif
return *this; return *this;
@ -191,18 +208,18 @@ namespace np {
__host__ __device__ __host__ __device__
Shape& operator=(Shape&& shape) noexcept { Shape& operator=(Shape&& shape) noexcept {
#if __DEBUG // #if __DEBUG
printf("Shape created (assign move)\n"); // print("Shape created (assign move)");
#endif // #endif
if (data != nullptr && data != shape.data){ if (data != nullptr && data != shape.data){
#if __DEBUG #if __DEBUG
printf("Former shape deleted (assign move)\n"); print("Former shape deleted (assign move)");
#endif #endif
delete[] data; delete[] data;
} }
if (refcount != nullptr && refcount != shape.refcount){ if (refcount != nullptr && refcount != shape.refcount){
#if __DEBUG #if __DEBUG
printf("Former shape refcount freed (assign move)\n"); print("Former shape refcount freed (assign move)");
#endif #endif
delete refcount; delete refcount;
} }
@ -210,9 +227,9 @@ namespace np {
data = shape.data; data = shape.data;
refcount = shape.refcount; refcount = shape.refcount;
#if __DEBUG #if __DEBUG
if (refcount == nullptr)
printf("Assigned copy shape has null refcount\n");
total = shape.total; total = shape.total;
if (refcount == nullptr)
print("Assigned copy shape has null refcount");
shape.total = 1; shape.total = 1;
#endif #endif
shape.length = 0; shape.length = 0;
@ -263,57 +280,62 @@ namespace np {
size_t* refcount = nullptr; size_t* refcount = nullptr;
__host__ __device__ __host__ __device__
Array(void) noexcept { Array() noexcept {
#if __DEBUG // #if __DEBUG
printf("Array created (default)\n"); // print("Array created (default)");
#endif // #endif
} }
__host__ __device__ __host__ __device__
Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { Array(const Shape& shape, T* data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
#if __DEBUG // #if __DEBUG
printf("Array created (raw, copy shape)\n"); // print("Array created (raw, copy shape)");
#endif // #endif
} }
__host__ __device__ __host__ __device__
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
#if __DEBUG // #if __DEBUG
printf("Array created (raw empty, copy shape)\n"); // print("Array created (raw empty, copy shape)");
#endif // #endif
} }
__host__ __device__ __host__ __device__
Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { Array(Shape&& shape, T* data) noexcept : shape(std::move(shape)), data(data), refcount(new size_t(1)) {
#if __DEBUG // #if __DEBUG
printf("Array created (raw, move shape)\n"); // print("Array created (raw, move shape)");
#endif // #endif
} }
__host__ __device__ __host__ __device__
Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { Array(Shape&& shape) noexcept : shape(std::move(shape)), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
#if __DEBUG // #if __DEBUG
printf("Array created (raw empty, move shape)\n"); // print("Array created (raw empty, move shape)");
#endif // #endif
} }
__host__ __device__ __host__ __device__
Array(const Array& array) noexcept : shape(array.shape) { Array(const Array& array) noexcept : shape(array.shape) {
#if __DEBUG #if __DEBUG
printf("Array created (copy)\n"); print("Array created (copy)");
#endif #endif
if (data != nullptr && data != array.data){ if (data != nullptr && data != array.data){
#if __DEBUG #ifdef __debug
printf("Former array deleted (copy)\n"); print("Former array deleted (move)");
#endif #endif
delete[] data; delete[] data;
} }
if (refcount != nullptr && refcount != array.refcount){ if (refcount != nullptr && refcount != array.refcount){
#if __DEBUG #if __DEBUG
printf("Former array refcount freed (copy)\n"); print("Former array refcount freed (move)");
#endif #endif
delete refcount; delete refcount;
} }
// const size_t size = np::prod(shape);
// data = new T[size];
// memcpy(data, array.data, size);
// refcount = new size_t;
// memcpy(refcount, array.refcount, sizeof(size_t));
data = array.data; data = array.data;
refcount = array.refcount; refcount = array.refcount;
@ -321,27 +343,28 @@ namespace np {
(*refcount)++; (*refcount)++;
#if __DEBUG #if __DEBUG
else else
printf("Moved array has null refcount\n"); print("Moved array has null refcount");
#endif #endif
} }
__host__ __device__ __host__ __device__
Array(Array&& array) noexcept : shape(std::move(array.shape)) { Array(Array&& array) noexcept {
#if __DEBUG // #if __DEBUG
printf("Array created (move)\n"); // print("Array created (move)");
#endif // #endif
if (data != nullptr && data != array.data){ if (data != nullptr && data != array.data){
#if __DEBUG #if __DEBUG
printf("Former array deleted (move)\n"); print("Former array deleted (move)");
#endif #endif
delete[] data; delete[] data;
} }
if (refcount != nullptr && refcount != array.refcount){ if (refcount != nullptr && refcount != array.refcount){
#if __DEBUG #if __DEBUG
printf("Former array refcount freed (move)\n"); print("Former array refcount freed (move)");
#endif #endif
delete refcount; delete refcount;
} }
shape = std::move(array.shape);
data = array.data; data = array.data;
refcount = array.refcount; refcount = array.refcount;
@ -350,24 +373,24 @@ namespace np {
} }
__host__ __device__ __host__ __device__
~Array(void) noexcept { ~Array() noexcept {
if(refcount == nullptr){ if(refcount == nullptr){
#if __DEBUG // #if __DEBUG
printf("Array refcount freed more than once\n"); // print("Array refcount freed more than once");
#endif // #endif
return; return;
} }
--(*refcount); --(*refcount);
#if __DEBUG // #if __DEBUG
printf("Array destructed : %lu\n", *refcount); // printf("Array destructed : %lu\n", *refcount);
#endif // #endif
if(*refcount == 0){ if(*refcount == 0){
if (data != nullptr){ if (data != nullptr){
delete[] data; delete[] data;
data = nullptr; data = nullptr;
#if __DEBUG // #if __DEBUG
printf("Array freeing ...\n"); // print("Array freeing ...");
#endif // #endif
} }
#if __DEBUG #if __DEBUG
else else
@ -381,47 +404,53 @@ namespace np {
__host__ __device__ __host__ __device__
Array& operator=(const Array& array) noexcept { Array& operator=(const Array& array) noexcept {
#if __DEBUG #if __DEBUG
printf("Array created (assign copy)\n"); print("Array created (assign copy)");
#endif #endif
if (data != nullptr && data != array.data){ if (data != nullptr && data != array.data){
#if __DEBUG #if __DEBUG
printf("Former array deleted (assign copy)\n"); print("Former array deleted (assign copy)");
#endif #endif
delete[] data; delete[] data;
} }
if (refcount != nullptr && refcount != array.refcount){ if (refcount != nullptr && refcount != array.refcount){
#if __DEBUG #if __DEBUG
printf("Former array refcount freed (assign copy)\n"); print("Former array refcount freed (assign copy)");
#endif #endif
delete refcount; delete refcount;
} }
shape = array.shape; shape = array.shape;
// const size_t size = np::prod(shape) * sizeof(T);
// data = new T[size];
// memcpy(data, array.data, size);
// refcount = new size_t;
// memcpy(refcount, array.refcount, sizeof(size_t));
data = array.data; data = array.data;
refcount = array.refcount; refcount = array.refcount;
if (refcount != nullptr) if (refcount != nullptr)
(*refcount)++; (*refcount)++;
#if __DEBUG #if __DEBUG
else else
printf("Assigned array has null refcount\n"); print("Assigned array has null refcount");
#endif #endif
return *this; return *this;
} }
__host__ __device__ __host__ __device__
Array& operator=(Array&& array) noexcept { Array& operator=(Array&& array) noexcept {
#if __DEBUG // #if __DEBUG
printf("Array created (assign move)\n"); // print("Array created (assign move)");
#endif // #endif
if (data != nullptr && data != array.data){ if (data != nullptr && data != array.data){
#if __DEBUG #if __DEBUG
printf("Former array deleted (assign move)\n"); print("Former array deleted (assign move)");
#endif #endif
delete[] data; delete[] data;
} }
if (refcount != nullptr && refcount != array.refcount){ if (refcount != nullptr && refcount != array.refcount){
#if __DEBUG #if __DEBUG
printf("Former array refcount freed (assign move)\n"); print("Former array refcount freed (assign move)");
#endif #endif
delete refcount; delete refcount;
} }
@ -456,39 +485,35 @@ namespace np {
}; };
template<typename T> template<typename T>
inline Array<T> empty(Shape&& shape) noexcept { Array<T> empty(Shape&& shape) noexcept {
return Array<T>(shape); return { std::move(shape), new T[np::prod(shape)] };
} }
template<typename T> template<typename T>
inline Array<T> empty(const Shape& shape) noexcept { Array<T> empty(const Shape& shape) noexcept {
return Array<T>(shape); return { std::move(shape), new T[np::prod(shape)] };
} }
template<typename T> template<typename T>
inline Array<T> empty(const std::initializer_list<size_t>& dims) noexcept { Array<T> empty(const std::initializer_list<size_t>& dims) noexcept {
return Array<T>(dims); const Shape shape(dims);
return { std::move(shape), new T[np::prod(shape)] };
} }
template<typename T> template<typename T>
Array<T> zeros(Shape&& shape) noexcept { Array<T> zeros(Shape&& shape) noexcept {
Array<T> res(shape); return { std::move(shape), new T[np::prod(shape)]{0} };
memset(res.data, 0, sizeof(T) * np::prod(res.shape));
return res;
} }
template<typename T> template<typename T>
Array<T> zeros(const Shape& shape) noexcept { Array<T> zeros(const Shape& shape) noexcept {
Array<T> res(shape); return { std::move(shape), new T[np::prod(shape)]{0} };
memset(res.data, 0, sizeof(T) * np::prod(res.shape));
return res;
} }
template<typename T> template<typename T>
Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept { Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept {
Array<T> res(dims); const Shape shape(dims);
memset(res.data, 0, sizeof(T) * np::prod(res.shape)); return { std::move(shape), new T[np::prod(shape)]{0} };
return res;
} }
template<typename T> template<typename T>
@ -748,7 +773,7 @@ constexpr np::Array<T>& map(np::Array<T>& a, const std::function<T(const size_t&
template<typename T> template<typename T>
__host__ __device__ __host__ __device__
constexpr inline static void swap(T* const a, T* const b) noexcept { constexpr inline static void swap(T* a, T* b) noexcept {
if (a == b) return; if (a == b) return;
const T temp = *a; const T temp = *a;
*a = *b; *a = *b;
@ -780,7 +805,7 @@ void quicksort(const np::Array<T>& a) noexcept {
} }
template<typename T> template<typename T>
static size_t as_partition(const T* const a, uint16_t* const indices, const size_t& l, const size_t& h) noexcept { static size_t as_partition(const T* a, uint16_t* indices, const size_t& l, const size_t& h) noexcept {
size_t i = l - 1; size_t i = l - 1;
for (size_t j = l; j <= h; ++j) for (size_t j = l; j <= h; ++j)
if (a[indices[j]] < a[indices[h]]) if (a[indices[j]] < a[indices[h]])
@ -789,27 +814,69 @@ static size_t as_partition(const T* const a, uint16_t* const indices, const size
return i; return i;
} }
std::array<np::Array<uint8_t>, 4> load_datasets(void); template<typename T>
void print_error_file(const char* const) noexcept; void argsort(const T* a, uint16_t* indices, const size_t& l, const size_t& h) noexcept {
const size_t total = h - l + 1;
size_t* stack = new size_t[total]{l, h};
size_t top = 1, low = l, high = h;
while (top <= total) {
high = stack[top--];
low = stack[top--];
if(low >= high)
break;
const size_t p = as_partition(a, indices, low, high);
if (p - 1 > low && p - 1 < total) {
stack[++top] = low;
stack[++top] = p - 1;
}
if (p + 1 < high) {
stack[++top] = p + 1;
stack[++top] = high;
}
}
delete[] stack;
}
template<typename T> template<typename T>
void save(const np::Array<T>& d, const char* const filename) { np::Array<uint16_t> argsort(const np::Array<T>& other, const size_t& l, const size_t& h) noexcept {
FILE* const output = fopen(filename, "wb"); np::Array<uint16_t> indices = np::empty(other.shape);
map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; });
argsort(other, indices, l, h);
return indices;
}
template<typename T>
np::Array<uint16_t> argsort(const np::Array<T>* other, const size_t& length) noexcept {
return argsort(other, 0, length - 1);
}
std::array<np::Array<uint8_t>, 4> load_datasets(void);
void print_error_file(const char*) noexcept;
template<typename T>
void save(const np::Array<T>& d, const char* filename) {
FILE* output = fopen(filename, "wb");
if (output == NULL) { if (output == NULL) {
print_error_file(filename); print_error_file(filename);
throw; throw;
} }
assert(d.shape.refcount != 0); assert(d.shape.refcount != 0);//, "Refcount shape is zero !!");
fwrite(&d.shape.length, sizeof(size_t), 1, output); fwrite(&d.shape.length, sizeof(size_t), 1, output);
fwrite(d.shape.data, sizeof(size_t), d.shape.length, output); fwrite(d.shape.data, sizeof(size_t), d.shape.length, output);
assert(d.refcount != 0); assert(d.refcount != 0);//, "Refcount array is zero !!");
fwrite(d.data, sizeof(T), np::prod(d.shape), output); fwrite(d.data, sizeof(T), np::prod(d.shape), output);
fclose(output); fclose(output);
} }
template<typename T> template<typename T>
np::Array<T> load(const char* const filename) { np::Array<T> load(const char* filename) {
FILE* const input = fopen(filename, "rb"); FILE* input = fopen(filename, "rb");
if (input == NULL) { if (input == NULL) {
print_error_file(filename); print_error_file(filename);
throw; throw;
@ -820,7 +887,7 @@ np::Array<T> load(const char* const filename) {
fclose(input); fclose(input);
throw; throw;
} }
size_t* const data = new size_t[length]; size_t* data = new size_t[length];
if(!fread(data, sizeof(size_t), length, input)){ if(!fread(data, sizeof(size_t), length, input)){
print_error_file(filename); print_error_file(filename);
fclose(input); fclose(input);
@ -838,7 +905,7 @@ np::Array<T> load(const char* const filename) {
#ifdef __CUDACC__ #ifdef __CUDACC__
template<typename T> template<typename T>
np::Array<T> copyToDevice(const char* const name, const np::Array<T>& array) noexcept { np::Array<T> copyToDevice(const char* name, const np::Array<T>& array) noexcept {
const size_t array_size = np::prod(array.shape) * sizeof(T); const size_t array_size = np::prod(array.shape) * sizeof(T);
const size_t shape_size = array.shape.length * sizeof(size_t); const size_t shape_size = array.shape.length * sizeof(size_t);
np::Array<T> d_array; np::Array<T> d_array;
@ -858,7 +925,7 @@ np::Array<T> copyToDevice(const char* const name, const np::Array<T>& array) noe
} }
template<typename T> template<typename T>
constexpr void cudaFree(const char* const name, np::Array<T>& array) noexcept { constexpr void cudaFree(const char* name, np::Array<T>& array) noexcept {
//_print_cuda_error_(name, cudaFree(array.refcount)); //_print_cuda_error_(name, cudaFree(array.refcount));
//array.refcount = nullptr; //array.refcount = nullptr;
_print_cuda_error_(name, cudaFree(array.data)); _print_cuda_error_(name, cudaFree(array.data));
@ -869,16 +936,16 @@ constexpr void cudaFree(const char* const name, np::Array<T>& array) noexcept {
array.shape.data = nullptr; array.shape.data = nullptr;
} }
constexpr inline void _print_cuda_error_(const char* const name, const cudaError_t& err) noexcept { constexpr inline void _print_cuda_error_(const char* name, const cudaError_t& err) noexcept {
if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err)); if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err));
} }
#endif #endif
int32_t print(const np::Shape&) noexcept; int print(const np::Shape&) noexcept;
int32_t print(const np::Array<uint8_t>&) noexcept; int print(const np::Array<uint8_t>&) noexcept;
int32_t print(const np::Array<float64_t>&) noexcept; int print(const np::Array<float64_t>&) noexcept;
int32_t print(const np::Array<uint8_t>&, const np::Slice&) noexcept; int print(const np::Array<uint8_t>&, const np::Slice&) noexcept;
int32_t print(const np::Array<uint32_t>&, const np::Slice&) noexcept; int print(const np::Array<uint32_t>&, const np::Slice&) noexcept;
int32_t print(const np::Array<int32_t>&, const np::Slice&) noexcept; int print(const np::Array<int32_t>&, const np::Slice&) noexcept;
int32_t print(const np::Array<uint16_t>&, const np::Slice&) noexcept; int print(const np::Array<uint16_t>&, const np::Slice&) noexcept;
int32_t print_feat(const np::Array<uint8_t>&, const np::Slice&) noexcept; int print_feat(const np::Array<uint8_t>&, const np::Slice&) noexcept;

View File

@ -1,16 +0,0 @@
#include "data.hpp"
/**
* @brief Product of every elements in a given shape after a given offset.
*
* @param shape Shape to product over
* @param offset Skip offset
* @return Scalar product
*/
__host__ __device__
size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
size_t result = shape[offset];
for(size_t i = 1 + offset; i < shape.length; ++i)
result *= shape[i];
return result;
}

View File

@ -1,15 +0,0 @@
services:
violajones-cpp:
image: saundersp/violajones-cpp
build: .
volumes:
- ./models:/home/ViolaJones/cpp/models
- ./out:/home/ViolaJones/cpp/out
- ../data:/home/ViolaJones/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]

View File

@ -11,7 +11,7 @@ void test_working(const size_t& length) noexcept {
const size_t size = length * sizeof(size_t); const size_t size = length * sizeof(size_t);
#if __DEBUG #if __DEBUG
printf("Estimating memory footprint at : %s\n", format_byte_size(2 * size).c_str()); print("Estimating memory footprint at : " + format_byte_size(2 * size));
#endif #endif
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length }); np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });
@ -53,7 +53,7 @@ void test_working_2d(const size_t& N1, const size_t& N2) noexcept {
const size_t size = length * sizeof(size_t); const size_t size = length * sizeof(size_t);
#if __DEBUG #if __DEBUG
printf("Estimating memory footprint at : %s\n", format_byte_size(2 * size).c_str()); print("Estimating memory footprint at : " + format_byte_size(2 * size));
#endif #endif
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length }); np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });
@ -96,7 +96,7 @@ void test_working_3d(const size_t& N1, const size_t& N2, const size_t& N3) noexc
const size_t size = length * sizeof(size_t); const size_t size = length * sizeof(size_t);
#if __DEBUG #if __DEBUG
printf("Estimating memory footprint at : %s\n", format_byte_size(2 * size).c_str()); print("Estimating memory footprint at : " + format_byte_size(2 * size));
#endif #endif
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length }); np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });

View File

@ -1,54 +1,50 @@
#include <filesystem> #include <filesystem>
namespace fs = std::filesystem;
#include "data.hpp" #include "data.hpp"
#include "toolbox.hpp" #include "toolbox.hpp"
#include "config.hpp" #include "config.hpp"
#include "gpu_unit_test.hpp"
#include "toolbox_unit_test.hpp" #include "toolbox_unit_test.hpp"
#include "ViolaJones.hpp" #include "ViolaJones.hpp"
#include "ViolaJones_device.hpp" #include "ViolaJonesGPU.hpp"
#include "ViolaJonesCPU.hpp"
#if GPU_BOOSTED #if GPU_BOOSTED
#include "gpu_unit_test.hpp"
#define LABEL "GPU" #define LABEL "GPU"
#define apply_features apply_features_gpu
#define set_integral_image set_integral_image_gpu
#define argsort_2d argsort_2d_gpu
#else #else
#define LABEL "CPU" #define LABEL "CPU"
#define apply_features apply_features_cpu
#define set_integral_image set_integral_image_cpu
#define argsort_2d argsort_2d_cpu
#endif #endif
/** std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> preprocessing() {
* @brief Execute the preprocessing phase
*
* The preprocessing phase consist of the following steps :
* - Load the dataset
* - Calculate features
* - Calculate integral images
* - Apply features to images
* - Calculate argsort of the featured images
*
* @return std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> Tuple containing in order : training features, training features sorted indexes, training labels, testing features, testing labels
*/
std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> preprocessing(void) {
// Creating state saver folders if they don't exist already // Creating state saver folders if they don't exist already
if (SAVE_STATE) if (SAVE_STATE)
for (const char* const folder_name : { "models", "out" }) for (const char* const folder_name : { "models", "out" })
std::filesystem::create_directory(folder_name); fs::create_directory(folder_name);
const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns(); const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns();
const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 }; const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 };
header(preproc_gaps, { "Preprocessing", "Time spent (ns)", "Formatted time spent" }); header({ "Preprocessing", "Time spent (ns)", "Formatted time spent" }, preproc_gaps);
const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", preproc_gaps[0], { "X_train", "y_train", "X_test", "y_test" }, const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", preproc_gaps[0], {"X_train", "y_train", "X_test", "y_test"},
FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets); FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets);
#if __DEBUG #if __DEBUG
printf("X_train\n"); print("X_train");
print(X_train.shape); print(X_train.shape);
print(X_train, { IDX_INSPECT }); print(X_train, { IDX_INSPECT });
printf("X_test\n"); print("X_test");
print(X_test.shape); print(X_test.shape);
print(X_test, { IDX_INSPECT }); print(X_test, { IDX_INSPECT });
printf("y_train\n"); print("y_train");
print(y_train.shape); print(y_train.shape);
print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
printf("y_test\n"); print("y_test");
print(y_test.shape); print(y_test.shape);
print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
#endif #endif
@ -57,7 +53,7 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]); FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]);
#if __DEBUG #if __DEBUG
printf("feats\n"); print("feats");
print(feats.shape); print(feats.shape);
print_feat(feats, { IDX_INSPECT }); print_feat(feats, { IDX_INSPECT });
#endif #endif
@ -68,10 +64,10 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test); FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test);
#if __DEBUG #if __DEBUG
printf("X_train_ii\n"); print("X_train_ii");
print(X_train_ii.shape); print(X_train_ii.shape);
print(X_train_ii, { IDX_INSPECT }); print(X_train_ii, { IDX_INSPECT });
printf("X_test_ii\n"); print("X_test_ii");
print(X_test_ii.shape); print(X_test_ii.shape);
print(X_test_ii, { IDX_INSPECT }); print(X_test_ii, { IDX_INSPECT });
#endif #endif
@ -82,15 +78,16 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii); FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii);
#if __DEBUG #if __DEBUG
printf("X_train_feat\n"); print("X_train_feat");
print(X_train_feat.shape); print(X_train_feat.shape);
print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
printf("X_test_feat\n"); print("X_test_feat");
print(X_test_feat.shape); print(X_test_feat.shape);
print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
#endif #endif
// const np::Array<int32_t> indices = state_saver<int32_t>("Selecting best features", preproc_gaps[0], "indices", select_percentile, X_train_feat, d.y_train); // const Array<int> indices = measure_time_save<Array<int>>("Selecting best features", "indices", select_percentile, X_train_feat, d.y_train);
// const Array<int> indices = measure_time<Array<int>>("Selecting best features", select_percentile, X_train_feat, d.y_train);
#if __DEBUG #if __DEBUG
// print_feature(indices); // print_feature(indices);
@ -100,18 +97,18 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat); FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat);
#if __DEBUG #if __DEBUG
printf("X_train_feat_argsort\n"); print("X_train_feat_argsort");
print(X_train_feat_argsort.shape); print(X_train_feat_argsort.shape);
print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
#endif #endif
const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL, // const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL,
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat); // FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
#if __DEBUG #if __DEBUG
printf("X_test_feat_argsort\n"); // printf("X_test_feat_argsort\n");
print(X_test_feat_argsort.shape); // print(X_test_feat_argsort.shape);
print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET }); // print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
#endif #endif
const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp); const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp);
formatted_line(preproc_gaps, "", "", "", ""); formatted_line(preproc_gaps, "", "", "", "");
@ -120,18 +117,10 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test }; return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test };
} }
/**
* @brief Train the weak classifiers.
*
* @param X_train_feat Training images
* @param X_train_feat_argsort Sorted indexes of the training images features
* @param y_train Training labels
* @return List of trained models
*/
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) noexcept { std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) noexcept {
const std::chrono::system_clock::time_point training_timestamp = perf_counter_ns(); const std::chrono::system_clock::time_point training_timestamp = perf_counter_ns();
const std::array<int32_t, 3> training_gaps = { 26, -18, 29 }; const std::array<int32_t, 3> training_gaps = { 26, -18, 29 };
header(training_gaps, { "Training", "Time spent (ns)", "Formatted time spent" }); header({ "Training", "Time spent (ns)", "Formatted time spent" }, training_gaps);
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> models; std::array<std::array<np::Array<float64_t>, 2>, TS.size()> models;
@ -147,9 +136,9 @@ std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array
const auto [ alphas, final_classifiers ] = state_saver<float64_t, 2>(title, training_gaps[0], { alphas_title, final_classifiers_title }, const auto [ alphas, final_classifiers ] = state_saver<float64_t, 2>(title, training_gaps[0], { alphas_title, final_classifiers_title },
FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train); FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train);
#if __DEBUG #if __DEBUG
printf("alphas\n"); print("alphas");
print(alphas); print(alphas);
printf("final_classifiers\n"); print("final_classifiers");
print(final_classifiers); print(final_classifiers);
#endif #endif
models[i++] = { alphas, final_classifiers }; models[i++] = { alphas, final_classifiers };
@ -162,18 +151,9 @@ std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array
return models; return models;
} }
/**
* @brief Benchmark the trained classifiers on the training and testing sets.
*
* @param models List of trained models
* @param X_train_feat Training features
* @param y_train Training labels
* @param X_test_feat Testing features
* @param y_test Testing labels
*/
void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>, TS.size()>& models, const np::Array<int32_t>& X_train_feat, const np::Array<uint8_t>& y_train, const np::Array<int32_t>& X_test_feat, const np::Array<uint8_t>& y_test) { void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>, TS.size()>& models, const np::Array<int32_t>& X_train_feat, const np::Array<uint8_t>& y_train, const np::Array<int32_t>& X_test_feat, const np::Array<uint8_t>& y_test) {
const std::array<int32_t, 5> testing_gaps = { 26, -19, 24, -19, 24 }; const std::array<int32_t, 5> testing_gaps = { 26, -19, 24, -19, 24 };
header(testing_gaps, { "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" }); header({ "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" }, testing_gaps);
std::array<std::array<float64_t, 8>, TS.size()> results; std::array<std::array<float64_t, 8>, TS.size()> results;
size_t i = 0; size_t i = 0;
@ -209,7 +189,7 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
footer(testing_gaps); footer(testing_gaps);
const std::array<int32_t, 9> evaluating_gaps = { 19, -7, -6, -6, -6, -7, -6, -6, -6 }; const std::array<int32_t, 9> evaluating_gaps = { 19, -7, -6, -6, -6, -7, -6, -6, -6 };
header(evaluating_gaps, { "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"}); header({ "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"}, evaluating_gaps);
i = 0; i = 0;
for (const size_t T : TS) { for (const size_t T : TS) {
@ -221,16 +201,10 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
footer(evaluating_gaps); footer(evaluating_gaps);
} }
/**
* @brief Test if the each result is equals to other devices.
*
* Given ViolaJones is a fully deterministic algorithm. The results, regardless the device, should be the same,
* this function check this assertion.
*/
void unit_test(void) { void unit_test(void) {
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns(); const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
const std::array<int32_t, 4> unit_gaps = { 37, -10, -18, 29}; const std::array<int32_t, 4> unit_gaps = { 37, -10, -18, 29};
header(unit_gaps, { "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" }); header({ "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" }, unit_gaps);
char title[BUFFER_SIZE] = { 0 }; char title[BUFFER_SIZE] = { 0 };
char tmp_title[BUFFER_SIZE / 2] = { 0 }; char tmp_title[BUFFER_SIZE / 2] = { 0 };
@ -250,104 +224,81 @@ void unit_test(void) {
formatted_row(unit_gaps, { title, "Failed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row(unit_gaps, { title, "Failed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
}; };
for (const char* const label : { "train", "test" }) { for (const char* label : { "train", "test" }) {
snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_ii_CPU.bin", label); sprintf(file_cpu, OUT_DIR "/X_%s_ii_CPU.bin", label);
snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_ii_GPU.bin", label); sprintf(file_gpu, OUT_DIR "/X_%s_ii_GPU.bin", label);
if (std::filesystem::exists(file_cpu) && std::filesystem::exists(file_gpu)) { if (fs::exists(file_cpu) && fs::exists(file_gpu)) {
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_ii", label);
snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title);
test_fnc(title, [&file_cpu, &file_gpu]{
const np::Array<uint32_t> X_train_ii_cpu = load<uint32_t>(file_cpu); const np::Array<uint32_t> X_train_ii_cpu = load<uint32_t>(file_cpu);
const np::Array<uint32_t> X_train_ii_gpu = load<uint32_t>(file_gpu); const np::Array<uint32_t> X_train_ii_gpu = load<uint32_t>(file_gpu);
return unit_test_cpu_vs_gpu<uint32_t>(X_train_ii_cpu, X_train_ii_gpu); sprintf(tmp_title, "X_%s_ii", label);
}); sprintf(title, "%-22s - CPU vs GPU", tmp_title);
test_fnc(title, [&X_train_ii_cpu, &X_train_ii_gpu]{ return unit_test_cpu_vs_gpu<uint32_t>(X_train_ii_cpu, X_train_ii_gpu); });
} }
snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_CPU.bin", label);
snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_GPU.bin", label);
uint8_t feat = 0;
char file_feat[BUFFER_SIZE] = { 0 }; char file_feat[BUFFER_SIZE] = { 0 };
if (std::filesystem::exists(file_cpu)) { sprintf(file_feat, OUT_DIR "/X_%s_feat_CPU.bin", label);
strncpy(file_feat, file_cpu, BUFFER_SIZE); if (fs::exists(file_feat)) {
feat = 1;
} else if (std::filesystem::exists(file_gpu)) {
strncpy(file_feat, file_gpu, BUFFER_SIZE);
feat = 2;
}
if (feat != 0) {
const np::Array<int32_t> X_feat = load<int32_t>(file_feat); const np::Array<int32_t> X_feat = load<int32_t>(file_feat);
snprintf(file_gpu, BUFFER_SIZE, feat == 1 ? OUT_DIR "/X_%s_feat_GPU.bin" : OUT_DIR "/X_%s_feat_CPU.bin", label); sprintf(file_gpu, OUT_DIR "/X_%s_feat_GPU.bin", label);
if (std::filesystem::exists(file_gpu)) { if (fs::exists(file_gpu)) {
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat", label); const np::Array<int32_t> X_feat_gpu = load<int32_t>(file_gpu);
snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title); sprintf(tmp_title, "X_%s_feat", label);
test_fnc(title, [&X_feat, &file_gpu]{ sprintf(title, "%-22s - CPU vs GPU", tmp_title);
const np::Array<int32_t> X_feat_aux = load<int32_t>(file_gpu); test_fnc(title, [&X_feat, &X_feat_gpu]{ return unit_test_cpu_vs_gpu<int32_t>(X_feat, X_feat_gpu); });
return unit_test_cpu_vs_gpu<int32_t>(X_feat, X_feat_aux);
});
} }
snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_argsort_CPU.bin", label); sprintf(file_cpu, OUT_DIR "/X_%s_feat_argsort_CPU.bin", label);
np::Array<uint16_t> X_feat_argsort_cpu; np::Array<uint16_t> X_feat_argsort_cpu;
uint8_t loaded = 0; uint8_t loaded = 0;
if (std::filesystem::exists(file_cpu)) { if (fs::exists(file_cpu)) {
X_feat_argsort_cpu = std::move(load<uint16_t>(file_cpu));
++loaded; ++loaded;
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label); sprintf(tmp_title, "X_%s_feat_argsort", label);
snprintf(title, BUFFER_SIZE, "%-22s - CPU argsort", tmp_title); sprintf(title, "%-22s - CPU argsort", tmp_title);
test_fnc(title, [&X_feat, &X_feat_argsort_cpu, &file_cpu]{ test_fnc(title, [&X_feat, &X_feat_argsort_cpu]{ return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_cpu); });
X_feat_argsort_cpu = load<uint16_t>(file_cpu);
return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_cpu);
});
} }
snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_argsort_GPU.bin", label); sprintf(file_gpu, OUT_DIR "/X_%s_feat_argsort_GPU.bin", label);
np::Array<uint16_t> X_feat_argsort_gpu; np::Array<uint16_t> X_feat_argsort_gpu;
if (std::filesystem::exists(file_gpu)) { if (fs::exists(file_gpu)) {
X_feat_argsort_gpu = std::move(load<uint16_t>(file_gpu));
++loaded; ++loaded;
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label); sprintf(tmp_title, "X_%s_feat_argsort", label);
snprintf(title, BUFFER_SIZE, "%-22s - GPU argsort", tmp_title); sprintf(title, "%-22s - GPU argsort", tmp_title);
test_fnc(title, [&X_feat, &X_feat_argsort_gpu, &file_gpu]{ test_fnc(title, [&X_feat, &X_feat_argsort_gpu]{ return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_gpu); });
X_feat_argsort_gpu = load<uint16_t>(file_gpu);
return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_gpu);
});
} }
if (loaded == 2){ if (loaded == 2){
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label); sprintf(tmp_title, "X_%s_feat_argsort", label);
snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title); sprintf(title, "%-22s - CPU vs GPU", tmp_title);
test_fnc(title, [&X_feat_argsort_cpu, &X_feat_argsort_gpu]{ return unit_test_cpu_vs_gpu<uint16_t>(X_feat_argsort_cpu, X_feat_argsort_gpu); }); test_fnc(title, [&X_feat_argsort_cpu, &X_feat_argsort_gpu]{ return unit_test_cpu_vs_gpu<uint16_t>(X_feat_argsort_cpu, X_feat_argsort_gpu); });
} }
} }
} }
for (const size_t T : TS) for (const size_t T : TS)
for (const char* const label : { "alphas", "final_classifiers" }) { for (const char* label : { "alphas", "final_classifiers" }) {
snprintf(file_cpu, BUFFER_SIZE, MODEL_DIR "/%s_%lu_CPU.bin", label, T); sprintf(file_cpu, MODEL_DIR "/%s_%lu_CPU.bin", label, T);
snprintf(file_gpu, BUFFER_SIZE, MODEL_DIR "/%s_%lu_GPU.bin", label, T); sprintf(file_gpu, MODEL_DIR "/%s_%lu_GPU.bin", label, T);
if (std::filesystem::exists(file_cpu) && std::filesystem::exists(file_gpu)){ if (fs::exists(file_cpu) && fs::exists(file_gpu)){
snprintf(tmp_title, BUFFER_SIZE / 2, "%s_%ld", label, T);
snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title);
test_fnc(title, [&file_cpu, &file_gpu]{
const np::Array<float64_t> cpu = load<float64_t>(file_cpu); const np::Array<float64_t> cpu = load<float64_t>(file_cpu);
const np::Array<float64_t> gpu = load<float64_t>(file_gpu); const np::Array<float64_t> gpu = load<float64_t>(file_gpu);
return unit_test_cpu_vs_gpu<float64_t>(cpu, gpu); sprintf(tmp_title, "%s_%ld", label, T);
}); sprintf(title, "%-22s - CPU vs GPU", tmp_title);
test_fnc(title, [&cpu, &gpu]{ return unit_test_cpu_vs_gpu<float64_t>(cpu, gpu); });
} }
} }
const long long time_spent = duration_ns(perf_counter_ns() - unit_timestamp); const long long time_spent = duration_ns(perf_counter_ns() - unit_timestamp);
if (n_total == 0)
formatted_row(unit_gaps, { "Unit testing summary", "No files", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
else {
snprintf(title, BUFFER_SIZE, "%ld/%ld", n_success, n_total); snprintf(title, BUFFER_SIZE, "%ld/%ld", n_success, n_total);
formatted_line(unit_gaps, "", "", "", ""); formatted_line(unit_gaps, "", "", "", "");
formatted_row(unit_gaps, { "Unit testing summary", title, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() }); formatted_row(unit_gaps, { "Unit testing summary", title, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
}
footer(unit_gaps); footer(unit_gaps);
} }
int32_t main(void){ int main(){
setlocale(LC_NUMERIC, ""); // Allow proper number display setlocale(LC_NUMERIC, ""); // Allow proper number display
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns(); const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
const std::array<int32_t, 3> unit_gaps = { 27, -18, 29 }; const std::array<int32_t, 3> unit_gaps = { 27, -18, 29 };
header(unit_gaps, { "Unit testing", "Time spent (ns)", "Formatted time spent" }); header({ "Unit testing", "Time spent (ns)", "Formatted time spent" }, unit_gaps);
#if GPU_BOOSTED #if GPU_BOOSTED
benchmark_function_void("Testing GPU capabilities 1D", unit_gaps[0], test_working, 50000); benchmark_function_void("Testing GPU capabilities 1D", unit_gaps[0], test_working, 50000);
benchmark_function_void("Testing GPU capabilities 2D", unit_gaps[0], test_working_2d, 200, 500); benchmark_function_void("Testing GPU capabilities 2D", unit_gaps[0], test_working_2d, 200, 500);

View File

@ -8,22 +8,22 @@
void printProgress(const float64_t& percentage) noexcept { void printProgress(const float64_t& percentage) noexcept {
const uint64_t val = static_cast<uint64_t>(percentage * 100); const uint64_t val = static_cast<uint64_t>(percentage * 100);
const int32_t lpad = static_cast<int32_t>(percentage * PBWIDTH); const int lpad = static_cast<int>(percentage * PBWIDTH);
const int32_t rpad = PBWIDTH - lpad; const int rpad = PBWIDTH - lpad;
fprintf(stderr, "%3lu%% [%.*s%*s]\r", val, lpad, PBSTR, rpad, ""); printf("%3lu%% [%.*s%*s]\r", val, lpad, PBSTR, rpad, "");
fflush(stderr); fflush(stdout);
} }
void clearProgress(void) noexcept { void clearProgress() noexcept {
// Progress bar width + space before + num space + space after // Progress bar width + space before + num space + space after
fprintf(stderr, "%*c\r", PBWIDTH + 1 + 3 + 3, ' '); printf("%*c\r", PBWIDTH + 1 + 3 + 3, ' ');
} }
template<typename T> template<typename T>
void test(const uint64_t& N) noexcept { void test(const uint64_t& N) noexcept {
#if __DEBUG #if __DEBUG
printf("DETERMINISTIC for N=%s of %s sized %s\n", thousand_sep(N).c_str(), typeid(T).name(), format_byte_size(sizeof(T)).c_str()); printf("DETERMINISTIC for N=%s of %s sized %s\n", thousand_sep(N).c_str(), typeid(T).name(), format_byte_size(sizeof(T)).c_str());
printf("Estimating memory footprint at : %s\n", format_byte_size(3 * N * sizeof(T)).c_str()); print("Estimating memory footprint at : " + format_byte_size(3 * N * sizeof(T)));
#endif #endif
T *a = new T[N], *b = new T[N], *c = new T[N]; T *a = new T[N], *b = new T[N], *c = new T[N];
@ -45,7 +45,7 @@ void test(const uint64_t& N) noexcept {
delete[] a, delete[] b, delete[] c; delete[] a, delete[] b, delete[] c;
} }
void test_float(void) noexcept { void test_float() noexcept {
std::cout << std::setprecision(1<<8); std::cout << std::setprecision(1<<8);
const uint64_t N = static_cast<uint64_t>(1)<<28; const uint64_t N = static_cast<uint64_t>(1)<<28;
test<float128_t>(N); test<float128_t>(N);
@ -60,3 +60,4 @@ void test_float(void) noexcept {
//printf("%.128lf\n", static_cast<float64_t>(1) / 3); //printf("%.128lf\n", static_cast<float64_t>(1) / 3);
//printf("%.128f\n", static_cast<float>(1) / 3); //printf("%.128f\n", static_cast<float>(1) / 3);
} }

View File

@ -1,17 +1,19 @@
#include "toolbox.hpp" #include "toolbox.hpp"
#include <numeric>
#include <algorithm>
inline static constexpr uint64_t u64(const double& n) noexcept { return static_cast<uint64_t>(n); } static constexpr uint64_t u64(const double& n) noexcept { return static_cast<uint64_t>(n); }
static const constexpr size_t N_TIMES = 11; static const constexpr size_t N_TIMES = 11;
static const constexpr std::array<const char*, N_TIMES> time_formats = { "ns", "us", "ms", "s", "m", "h", "j", "w", "M", "y", "c" }; static const constexpr std::array<const char*, N_TIMES> time_formats = { "ns", "µs", "ms", "s", "m", "h", "j", "w", "M", "y", "c" };
static const constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3), u64(1e6), u64(1e9), u64(6e10), u64(36e11), u64(864e11), static const constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3), u64(1e6), u64(1e9), u64(6e10), u64(36e11), u64(864e11),
u64(6048e11), u64(26784e11), u64(31536e12), u64(31536e14) }; u64(6048e11), u64(26784e11), u64(31536e12), u64(31536e14) };
/** /**
* @brief Format the time in seconds in human readable format. * @brief Format the time in seconds in human readable format.
* *
* @param time number of seconds * @param time Time in seconds
* @return The formatted human readable string * @return std::string The formatted human readable string.
*/ */
std::string format_time(uint64_t time) noexcept { std::string format_time(uint64_t time) noexcept {
if (time == 0) if (time == 0)
@ -19,8 +21,8 @@ std::string format_time(uint64_t time) noexcept {
std::string s = ""; std::string s = "";
uint64_t res; uint64_t res;
for (int32_t i = N_TIMES - 1; i >= 3; --i) { for (int i = N_TIMES - 1; i >= 3; --i) {
const uint64_t time_number = time_numbers[i] / u64(1e9); // Converting nanosecond timestamp to second const uint64_t time_number = time_numbers[i] / 1e9; // Converting nanosecond timestamp to second
if (time >= time_number) { if (time >= time_number) {
res = time / time_number; res = time / time_number;
time %= time_number; time %= time_number;
@ -28,7 +30,7 @@ std::string format_time(uint64_t time) noexcept {
} }
} }
// Remove trailing character if (s.back() == ' ')
s.pop_back(); s.pop_back();
return s; return s;
@ -38,7 +40,7 @@ std::string format_time(uint64_t time) noexcept {
* @brief Format the time in nanoseconds in human readable format. * @brief Format the time in nanoseconds in human readable format.
* *
* @param time Time in nanoseconds * @param time Time in nanoseconds
* @return std::string The formatted human readable string * @return std::string The formatted human readable string.
*/ */
std::string format_time_ns(uint64_t time) noexcept { std::string format_time_ns(uint64_t time) noexcept {
if (time == 0) if (time == 0)
@ -46,7 +48,7 @@ std::string format_time_ns(uint64_t time) noexcept {
std::string s = ""; std::string s = "";
uint64_t res; uint64_t res;
for (int32_t i = N_TIMES - 1; i >= 0; --i) { for (int i = N_TIMES - 1; i >= 0; --i) {
if (time >= time_numbers[i]) { if (time >= time_numbers[i]) {
res = time / time_numbers[i]; res = time / time_numbers[i];
time %= time_numbers[i]; time %= time_numbers[i];
@ -54,7 +56,7 @@ std::string format_time_ns(uint64_t time) noexcept {
} }
} }
// Remove trailing character if (s.back() == ' ')
s.pop_back(); s.pop_back();
return s; return s;
@ -69,7 +71,7 @@ static const constexpr uint64_t total_bytes = u64(1)<<(10 * (N_BYTES - 1));
* See more : https://en.wikipedia.org/wiki/JEDEC_memory_standards * See more : https://en.wikipedia.org/wiki/JEDEC_memory_standards
* *
* @param bytes Number of bytes * @param bytes Number of bytes
* @return JEDEC compliant formatted number of bytes * @return std::string JEDEC compliant formatted number of bytes
*/ */
std::string format_byte_size(uint64_t bytes) noexcept { std::string format_byte_size(uint64_t bytes) noexcept {
if (bytes == 0) if (bytes == 0)
@ -93,13 +95,6 @@ std::string format_byte_size(uint64_t bytes) noexcept {
return s; return s;
} }
/**
* @brief Format a number with a separator (i.e. 1000 as 1,000)
*
* @param k number to format
* @param separator used between each thouand
* @return Formatted number
*/
std::string thousand_sep(uint64_t k, const char& separator) noexcept { std::string thousand_sep(uint64_t k, const char& separator) noexcept {
const std::string n = std::to_string(k); const std::string n = std::to_string(k);
const uint64_t st_size = n.length() + (n.length() - 1) / 3; const uint64_t st_size = n.length() + (n.length() - 1) / 3;
@ -116,3 +111,4 @@ std::string thousand_sep(uint64_t k, const char& separator) noexcept {
return s; return s;
} }

View File

@ -3,13 +3,6 @@
#include <string> #include <string>
#include <stdint.h> #include <stdint.h>
/**
* @brief Print a formatted row of titles with of gaps seperated by a separator.
*
* @param gaps List of size gaps
* @param titles List of titles
* @param separator Separator character between each gap
*/
template<size_t N> template<size_t N>
constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles, constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles,
const char* const separator = "") noexcept { const char* const separator = "") noexcept {
@ -18,19 +11,10 @@ constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::arra
printf("%s\n", separator); printf("%s\n", separator);
} }
/**
* @brief Print a formatted line of repeated characters.
*
* @param gaps List of size gaps
* @param right Character on the left
* @param middle Character between each separator
* @param separator Separator character between each gap
* @param left Character on the right
*/
template<size_t N> template<size_t N>
constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* const left, const char* const middle, constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* const right, const char* const middle,
const char* const separator, const char* const right) noexcept { const char* const separator, const char* const left) noexcept {
printf("%s", left); printf("%s", right);
for(size_t i = 0; i < N; ++i){ for(size_t i = 0; i < N; ++i){
for(int32_t j = std::abs(gaps[i]) + 2; j > 0; --j) for(int32_t j = std::abs(gaps[i]) + 2; j > 0; --j)
printf("%s", separator); printf("%s", separator);
@ -38,27 +22,16 @@ constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* co
printf("%s", middle); printf("%s", middle);
} }
printf("%s\n", right); printf("%s\n", left);
} }
/**
* @brief Print a formatted header with the given titles and sizes.
*
* @param gaps List of size gaps
* @param titles List of titles
*/
template<size_t N> template<size_t N>
constexpr void header(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles) noexcept { constexpr void header(const std::array<const char* const, N>& titles, const std::array<int32_t, N>& gaps) noexcept {
formatted_line(gaps, "", "", "", ""); formatted_line(gaps, "", "", "", "");
formatted_row(gaps, titles); formatted_row(gaps, titles);
formatted_line(gaps, "", "", "", ""); formatted_line(gaps, "", "", "", "");
} }
/**
* @brief Print a formatted footer with the given sizes.
*
* @param gaps List of size gaps
*/
template<size_t N> template<size_t N>
constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept { constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
formatted_line(gaps, "", "", "", ""); formatted_line(gaps, "", "", "", "");
@ -67,36 +40,7 @@ constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
#define duration_ns(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count() #define duration_ns(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count()
#define perf_counter_ns() std::chrono::high_resolution_clock::now() #define perf_counter_ns() std::chrono::high_resolution_clock::now()
/**
* @brief Format the time in seconds in human readable format.
*
* @param time number of seconds
* @return The formatted human readable string
*/
std::string format_time(uint64_t) noexcept; std::string format_time(uint64_t) noexcept;
/**
* @brief Format the time in nanoseconds in human readable format.
*
* @param time Time in nanoseconds
* @return std::string The formatted human readable string
*/
std::string format_time_ns(uint64_t) noexcept; std::string format_time_ns(uint64_t) noexcept;
/**
* @brief Convert the number of byte in JEDEC standard form.
* See more : https://en.wikipedia.org/wiki/JEDEC_memory_standards
*
* @param bytes Number of bytes
* @return JEDEC compliant formatted number of bytes
*/
std::string format_byte_size(uint64_t) noexcept; std::string format_byte_size(uint64_t) noexcept;
/**
* @brief Format a number with a separator (i.e. 1000 as 1,000)
*
* @param k number to format
* @param separator used between each thouand
* @return Formatted number
*/
std::string thousand_sep(uint64_t, const char& = ',') noexcept; std::string thousand_sep(uint64_t, const char& = ',') noexcept;

View File

@ -2,25 +2,14 @@
#include <iostream> #include <iostream>
#include <assert.h> #include <assert.h>
/**
* @brief Test if a given result is equal of the expected one and log result
*
* @tparam T type of returning values
* @param name of the unit test
* @param expected result of the function call
* @param result of the function
*/
template<typename T> template<typename T>
static void Assert(const char* const name, const T& expected, const T& result) noexcept { void Assert(const char* name, const T& expected, const T& result) noexcept {
if(expected != result){ if(expected != result){
std::cerr << "For test named " << name << " Expected '" << expected << "' but got '" << result << "' instead\n"; std::cerr << "For test named " << name << " Expected '" << expected << "' but got '" << result << "' instead\n";
assert(false); assert(false);
} }
} }
/**
* @brief Test suite for the format_byte_size output
*/
void format_byte_size_test(void) noexcept { void format_byte_size_test(void) noexcept {
Assert("format_byte_size null", std::string("0B"), format_byte_size(static_cast<uint64_t>(0))); Assert("format_byte_size null", std::string("0B"), format_byte_size(static_cast<uint64_t>(0)));
Assert("format_byte_size byte", std::string("1B"), format_byte_size(static_cast<uint64_t>(1))); Assert("format_byte_size byte", std::string("1B"), format_byte_size(static_cast<uint64_t>(1)));
@ -37,9 +26,6 @@ void format_byte_size_test(void) noexcept {
Assert("format_byte_size max", std::string("15EB 1023PB 1023TB 1023GB 1023MB 1023KB 1023B"), format_byte_size(static_cast<uint64_t>(-1))); Assert("format_byte_size max", std::string("15EB 1023PB 1023TB 1023GB 1023MB 1023KB 1023B"), format_byte_size(static_cast<uint64_t>(-1)));
} }
/**
* @brief Test suite for the format_time output
*/
void format_time_test(void) noexcept { void format_time_test(void) noexcept {
// https://en.wikipedia.org/wiki/Unit_of_time // https://en.wikipedia.org/wiki/Unit_of_time
Assert("format_time null", std::string("0s"), format_time(static_cast<uint64_t>(0))); Assert("format_time null", std::string("0s"), format_time(static_cast<uint64_t>(0)));
@ -94,15 +80,12 @@ void format_time_test(void) noexcept {
Assert("format_time max", std::string("5849424173c 55y 3w 5j 7h 15s"), format_time(static_cast<uint64_t>(-1))); Assert("format_time max", std::string("5849424173c 55y 3w 5j 7h 15s"), format_time(static_cast<uint64_t>(-1)));
} }
/**
* @brief Test suite for the format_time_ns output
*/
void format_time_ns_test(void) noexcept { void format_time_ns_test(void) noexcept {
// https://en.wikipedia.org/wiki/Unit_of_time // https://en.wikipedia.org/wiki/Unit_of_time
Assert("format_time_ns null", std::string("0ns"), format_time_ns(static_cast<uint64_t>(0))); Assert("format_time_ns null", std::string("0ns"), format_time_ns(static_cast<uint64_t>(0)));
Assert("format_time_ns nanosecond", std::string("1ns"), format_time_ns(static_cast<uint64_t>(1))); Assert("format_time_ns nanosecond", std::string("1ns"), format_time_ns(static_cast<uint64_t>(1)));
Assert("format_time_ns shake", std::string("10ns"), format_time_ns(static_cast<uint64_t>(10))); Assert("format_time_ns shake", std::string("10ns"), format_time_ns(static_cast<uint64_t>(10)));
Assert("format_time_ns microsecond", std::string("1us"), format_time_ns(static_cast<uint64_t>(1e3))); Assert("format_time_ns microsecond", std::string("1µs"), format_time_ns(static_cast<uint64_t>(1e3)));
Assert("format_time_ns millisecond", std::string("1ms"), format_time_ns(static_cast<uint64_t>(1e6))); Assert("format_time_ns millisecond", std::string("1ms"), format_time_ns(static_cast<uint64_t>(1e6)));
Assert("format_time_ns centisecond", std::string("10ms"), format_time_ns(static_cast<uint64_t>(1e7))); Assert("format_time_ns centisecond", std::string("10ms"), format_time_ns(static_cast<uint64_t>(1e7)));
Assert("format_time_ns decisecond", std::string("100ms"), format_time_ns(static_cast<uint64_t>(1e8))); Assert("format_time_ns decisecond", std::string("100ms"), format_time_ns(static_cast<uint64_t>(1e8)));
@ -129,7 +112,7 @@ void format_time_ns_test(void) noexcept {
Assert("format_time_ns year", std::string("1y"), format_time_ns(static_cast<uint64_t>(31536e12))); Assert("format_time_ns year", std::string("1y"), format_time_ns(static_cast<uint64_t>(31536e12)));
Assert("format_time_ns tropical year", std::string("1y 5h 48m 45s 216ms"), format_time_ns(static_cast<uint64_t>(31556925216e6))); Assert("format_time_ns tropical year", std::string("1y 5h 48m 45s 216ms"), format_time_ns(static_cast<uint64_t>(31556925216e6)));
Assert("format_time_ns gregorian year", std::string("1y 5h 49m 12s"), format_time_ns(static_cast<uint64_t>(31556952e9))); Assert("format_time_ns gregorian year", std::string("1y 5h 49m 12s"), format_time_ns(static_cast<uint64_t>(31556952e9)));
Assert("format_time_ns sidereal year", std::string("1y 6h 9m 9s 763ms 545us 600ns"), format_time_ns(static_cast<uint64_t>(315581497635456e2))); Assert("format_time_ns sidereal year", std::string("1y 6h 9m 9s 763ms 545µs 600ns"), format_time_ns(static_cast<uint64_t>(315581497635456e2)));
Assert("format_time_ns leap year", std::string("1y 1j"), format_time_ns(static_cast<uint64_t>(316224e11))); Assert("format_time_ns leap year", std::string("1y 1j"), format_time_ns(static_cast<uint64_t>(316224e11)));
Assert("format_time_ns olympiad", std::string("4y"), format_time_ns(static_cast<uint64_t>(126144e12))); Assert("format_time_ns olympiad", std::string("4y"), format_time_ns(static_cast<uint64_t>(126144e12)));
Assert("format_time_ns lusturm", std::string("5y"), format_time_ns(static_cast<uint64_t>(15768e13))); Assert("format_time_ns lusturm", std::string("5y"), format_time_ns(static_cast<uint64_t>(15768e13)));
@ -154,12 +137,9 @@ void format_time_ns_test(void) noexcept {
//Assert("format_time_ns ronnasecond", std::string(""), format_time_ns(static_cast<uint64_t>(1e36))); //Assert("format_time_ns ronnasecond", std::string(""), format_time_ns(static_cast<uint64_t>(1e36)));
//Assert("format_time_ns quettasecond", std::string(""), format_time_ns(static_cast<uint64_t>(1e39))); //Assert("format_time_ns quettasecond", std::string(""), format_time_ns(static_cast<uint64_t>(1e39)));
// uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1 // uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1
Assert("format_time_ns max", std::string("5c 84y 11M 2j 23h 34m 33s 709ms 551us 615ns"), format_time_ns(static_cast<uint64_t>(-1))); Assert("format_time_ns max", std::string("5c 84y 11M 2j 23h 34m 33s 709ms 551µs 615ns"), format_time_ns(static_cast<uint64_t>(-1)));
} }
/**
* @brief Test suite for the thousand_sep output
*/
void thousand_sep_test(void) noexcept { void thousand_sep_test(void) noexcept {
// https://en.wikipedia.org/wiki/Names_of_large_numbers // https://en.wikipedia.org/wiki/Names_of_large_numbers
Assert("thousand_sep null", std::string("0"), thousand_sep(static_cast<uint64_t>(0))); Assert("thousand_sep null", std::string("0"), thousand_sep(static_cast<uint64_t>(0)));
@ -202,3 +182,4 @@ void thousand_sep_test(void) noexcept {
// uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1 // uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1
Assert("thousand_sep max", std::string("18,446,744,073,709,551,615"), thousand_sep(static_cast<uint64_t>(-1))); Assert("thousand_sep max", std::string("18,446,744,073,709,551,615"), thousand_sep(static_cast<uint64_t>(-1)));
} }

View File

@ -1,21 +1,6 @@
#pragma once #pragma once
/**
* @brief Test suite for the format_byte_size output
*/
void format_byte_size_test(void) noexcept; void format_byte_size_test(void) noexcept;
/**
* @brief Test suite for the format_time output
*/
void format_time_test(void) noexcept; void format_time_test(void) noexcept;
/**
* @brief Test suite for the format_time_ns output
*/
void format_time_ns_test(void) noexcept; void format_time_ns_test(void) noexcept;
/**
* @brief Test suite for the thousand_sep output
*/
void thousand_sep_test(void) noexcept; void thousand_sep_test(void) noexcept;

View File

@ -1,19 +0,0 @@
services:
downloader:
extends:
file: ./downloader/docker-compose.yaml
service: downloader
violajones-cpp:
extends:
file: ./cpp/docker-compose.yaml
service: violajones-cpp
depends_on:
downloader:
condition: service_completed_successfully
violajones-python:
extends:
file: ./python/docker-compose.yaml
service: violajones-python
depends_on:
downloader:
condition: service_completed_successfully

42
download_data.sh Executable file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env bash
#!/bin/sh
# Exit if any of the command doesn't exit with code 0
set -e
EXEC_DIR=$1
test -z $EXEC_DIR && EXEC_DIR=.
DATA_LOCATION=$EXEC_DIR/data
mkdir -p $DATA_LOCATION
if [ ! -f $DATA_LOCATION/X_train.bin ] || [ ! -f $DATA_LOCATION/X_test.bin ] \
|| [ ! -f $DATA_LOCATION/y_train.bin ] || [ ! -f $DATA_LOCATION/y_test.bin ]; then
#if true; then
if [ ! -f $DATA_LOCATION/faces.tar.gz ]; then
echo 'Downloading raw dataset'
curl -o $DATA_LOCATION/faces.tar.gz http://www.ai.mit.edu/courses/6.899/lectures/faces.tar.gz
fi
echo 'Extracting raw files'
tar xzf $DATA_LOCATION/faces.tar.gz -C $DATA_LOCATION
rm $DATA_LOCATION/README
rm $DATA_LOCATION/svm.*
echo 'Extracting raw train set'
tar xzf $DATA_LOCATION/face.train.tar.gz -C $DATA_LOCATION
rm $DATA_LOCATION/face.train.tar.gz
echo 'Extracting raw test set'
tar xzf $DATA_LOCATION/face.test.tar.gz -C $DATA_LOCATION
rm $DATA_LOCATION/face.test.tar.gz
echo 'Converting raw dataset to bin file'
source $EXEC_DIR/python/activate.sh $EXEC_DIR
python $EXEC_DIR/python/convert_dataset.py $DATA_LOCATION
echo 'Removing leftovers'
rm -rf $DATA_LOCATION/train
rm -rf $DATA_LOCATION/test
echo 'Done !'
fi

View File

@ -1,11 +0,0 @@
FROM alpine:3.19.1
RUN apk add --no-cache curl=8.5.0-r0 python3=3.11.9-r0 && rm -rf /var/cache/apk*
WORKDIR /home/ViolaJones/downloader
COPY requirements.txt activate.sh ./
RUN ./activate.sh
COPY download_data.sh convert_dataset.py ./
CMD ["./download_data.sh"]

View File

@ -1,27 +0,0 @@
#!/bin/sh
# Exit if any of the command doesn't exit with code 0
set -e
test -z "$EXEC_DIR" && EXEC_DIR=.
test -z "$VENV_PATH" && VENV_PATH="$EXEC_DIR/venv"
activate(){
if [ ! -d "$VENV_PATH" ]; then
echo 'Creating python virtual environnement'
python -m venv "$VENV_PATH"
echo 'Activating virtual environnement'
activate
echo 'Updating base pip packages'
python -m pip install -U setuptools pip
echo 'Installing requirements'
pip install -r requirements.txt
elif [ -f "$VENV_PATH"/Scripts/activate ]; then . "$VENV_PATH"/Scripts/activate
elif [ -f "$VENV_PATH"/bin/activate ]; then . "$VENV_PATH"/bin/activate
else
echo 'Python virtual environnement not detected'
exit 1
fi
}
activate

View File

@ -1,60 +0,0 @@
from io import BufferedReader
from tqdm import tqdm
from functools import partial
from sys import argv
import numpy as np
from os import path, listdir
# Induce determinism
np.random.seed(133742)
# Makes the "leave" argument default to False
tqdm = partial(tqdm, leave = False)
def read_pgm(pgm_file: BufferedReader) -> np.ndarray:
"""Read the data of a PGM file
Args:
pgm_file (BufferedReader): PGM File
Returns:
np.ndarray: PGM data
"""
assert (f := pgm_file.readline()) == b'P5\n', f"Incorrect file format: {f}"
(width, height) = [int(i) for i in pgm_file.readline().split()]
assert width > 0 and height > 0, f"Incorrect dimensions: {width}x{height}"
assert (depth := int(pgm_file.readline())) < 256, f"Incorrect depth: {depth}"
buff = np.empty(height * width, dtype = np.uint8)
for i in range(buff.shape[0]):
buff[i] = ord(pgm_file.read(1))
return buff.reshape((height, width))
def __main__(data_path: str) -> None:
"""Read the data of every PGM file and output it in data files
Args:
data_path (str): Path of the PGM files
"""
for set_name in tqdm(["train", "test"], desc = "set name"):
X, y = [], []
for y_i, label in enumerate(tqdm(["non-face", "face"], desc = "label")):
for filename in tqdm(listdir(f"{data_path}/{set_name}/{label}"), desc = "Reading pgm file"):
with open(f"{data_path}/{set_name}/{label}/{filename}", "rb") as face:
X.append(read_pgm(face))
y.append(y_i)
X, y = np.asarray(X), np.asarray(y)
idx = np.random.permutation(y.shape[0])
X, y = X[idx], y[idx]
for org, s in tqdm(zip("Xy", [X, y]), desc = f"Writing {set_name}"):
with open(f"{data_path}/{org}_{set_name}.bin", "w") as out:
out.write(f'{str(s.shape)[1:-1].replace(",", "")}\n')
raw = s.ravel()
for s_i in tqdm(raw[:-1], desc = f"Writing {org}"):
out.write(f"{s_i} ")
out.write(str(raw[-1]))
if __name__ == "__main__":
__main__(argv[1]) if len(argv) == 2 else print(f"Usage: python {__file__[__file__.rfind(path.sep) + 1:]} ./data_location")

View File

@ -1,6 +0,0 @@
services:
downloader:
image: saundersp/violajones-downloader
build: .
volumes:
- ../data:/home/ViolaJones/data

View File

@ -1,38 +0,0 @@
#!/bin/sh
# Exit if any of the command doesn't exit with code 0
set -e
test -z "$EXEC_DIR" && EXEC_DIR=.
DATA_PATH="$EXEC_DIR/../data"
test ! -d "$DATA_PATH" && mkdir -v "$DATA_PATH"
if [ ! -f "$DATA_PATH"/X_train.bin ] || [ ! -f "$DATA_PATH"/X_test.bin ] \
|| [ ! -f "$DATA_PATH"/y_train.bin ] || [ ! -f "$DATA_PATH"/y_test.bin ]; then
if [ ! -f "$DATA_PATH"/faces.tar.gz ]; then
echo 'Downloading raw dataset'
curl -o "$DATA_PATH"/faces.tar.gz http://www.ai.mit.edu/courses/6.899/lectures/faces.tar.gz
fi
echo 'Extracting raw files'
tar xvzf "$DATA_PATH"/faces.tar.gz -C "$DATA_PATH"
rm -v "$DATA_PATH"/README "$DATA_PATH"/svm.*
echo 'Extracting raw train set'
tar xvzf "$DATA_PATH"/face.train.tar.gz -C "$DATA_PATH"
rm -v "$DATA_PATH"/face.train.tar.gz
echo 'Extracting raw test set'
tar xvzf "$DATA_PATH"/face.test.tar.gz -C "$DATA_PATH"
rm -v "$DATA_PATH"/face.test.tar.gz
echo 'Converting raw dataset to bin file'
export EXEC_DIR
. "$EXEC_DIR"/activate.sh
python "$EXEC_DIR"/convert_dataset.py "$DATA_PATH"
echo 'Removing leftovers'
rm -rvf "$DATA_PATH"/train "$DATA_PATH"/test
echo 'Done !'
fi

View File

@ -1,2 +0,0 @@
numpy==1.26.4
tqdm==4.66.2

View File

@ -1,12 +0,0 @@
FROM nvidia/cuda:12.4.1-devel-ubi9 as builder
RUN dnf install -y python3.11-3.11.5-1.el9_3 && dnf clean all
RUN ln -s /usr/bin/python3 /usr/bin/python
WORKDIR /home/ViolaJones/python
COPY Makefile activate.sh requirements.txt ./
RUN make venv
COPY *.py ./
ENTRYPOINT ["make"]
CMD ["start"]

View File

@ -1,85 +1,34 @@
MODELS_DIR := models DATA := ../data/X_train.bin ../data/X_test.bin ../data/y_train.bin ../data/y_test.bin
OUT_DIR := out
DATA_PATH := ../data
DATA := $(DATA_PATH)/X_train.bin $(DATA_PATH)/X_test.bin $(DATA_PATH)/y_train.bin $(DATA_PATH)/y_test.bin
.PHONY: all .PHONY: all start reset
all: venv
$(DATA): all: ${DATA}
@echo 'Missing $(DATA) files, use downloader first' && exit 1
${DATA}:
@bash ../download_data.sh ..
.PHONY: venv
venv: venv:
@sh -c '. ./activate.sh' @bash -c 'source activate.sh'
.PHONY: start start: ${DATA} venv
start: $(DATA) | venv check-python-works @bash -c 'source activate.sh && python projet.py'
@sh -c '. ./activate.sh && python projet.py'
.PHONY: debug reset:
debug: $(DATA) | venv check-python-works check-pudb-works @echo Deleting generated states and models
@rm -rf out/* models/* | true
debug:
@bash -c 'source activate.sh && pudb projet.py' @bash -c 'source activate.sh && pudb projet.py'
.PHONY: profile profile:
profile: $(DATA) | venv check-python-works check-gprof2dot-works check-dot-works @bash -c 'source activate.sh && python -m cProfile -o prof.out projet.py && gprof2dot -f pstats prof.out | dot -Tpng -o output.png'
@bash -c 'source activate.sh && python -m cProfile -o prof.out projet.py && gprof2dot -f pstats prof.out | dot -T png -o output.png'
.PHONY: log mrproper: reset
log: $(DATA) reset | venv @rm -r __pycache__ venv
@sed -i 's/GPU_BOOSTED: Final = False/GPU_BOOSTED: Final = True/;s/COMPILE_WITH_C: Final = False/COMPILE_WITH_C: Final = True/' config.py
@echo 'Logging GPU'
@make -s start > log_gpu
@sed -i 's/GPU_BOOSTED: Final = True/GPU_BOOSTED: Final = False/' config.py
@echo 'Logging CPU'
@make -s start > log_cpu
@sed -i 's/GPU_BOOSTED: Final = False/GPU_BOOSTED: Final = True/;s/COMPILE_WITH_C: Final = True/COMPILE_WITH_C: Final = False/' config.py
@echo 'Logging PGPU'
@make -s start > log_pgpu
@sed -i 's/GPU_BOOSTED: Final = True/GPU_BOOSTED: Final = False/' config.py
@echo 'Logging PY'
@make -s start > log_py
@echo 'Cleaning up'
@make -s reset
.PHONY: reset test:
reset: @bash -c 'source activate.sh && ls out | sed s/.pkl// | xargs -n1 python test_diff.py out'
@echo 'Deleting generated states and models' @bash -c 'source activate.sh && ls models | sed s/.pkl// | xargs -n1 python test_diff.py models'
@rm -frv $(OUT_DIR)/* $(MODELS_DIR)/*
#@ln -sv /mnt/pierre_stuffs/ViolaJones/python/models .
#@ln -sv /mnt/pierre_stuffs/ViolaJones/python/out .
.PHONY: clean
clean:
@rm -fv log_gpu log_cpu log_gpu log_py
.PHONY: mrproper
mrproper: clean
@rm -rfv __pycache__ venv
.PHONY: help
help: help:
@echo "Available targets:" @echo "all start reset mrproper help"
@echo "\tall: alias for start, (default target)"
@echo "\tvenv: Create python virtual environnement."
@echo "\tstart: Start the ViolaJones algorithm, require data beforehand downloaded by the downloader."
@echo "\tdebug: Debug the ViolaJones algorithm, require data beforehand downloaded by the downloader."
@echo "\tprofile: Profile the ViolaJones algorithm functions timestamps, require data beforehand downloaded by the downloader."
@echo "\treset: Will delete any saved models and processed data made by ViolaJones."
@echo "\tmrproper: Will remove cpp binary files. Will execute reset target beforehand."
.PHONY: check-python-works
check-python-works:
@python --version >/dev/null 2>&1 || (echo 'Please install Python.' && exit 1)
.PHONY: check-pudb-works
check-pudb-works:
@pudb --version >/dev/null 2>&1 || (echo 'Please install pudb.' && exit 1)
.PHONY: check-gprof2dot-works
check-gprof2dot-works:
@gprof2dot --help >/dev/null 2>&1 || (echo 'Please install gprof2dot.' && exit 1)
.PHONY: check-dot-works
check-dot-works:
@dot --version >/dev/null 2>&1 || (echo 'Please install dot from graphviz.' && exit 1)

View File

@ -18,13 +18,13 @@ else:
@njit('uint8[:, :, :, :](uint16, uint16)') @njit('uint8[:, :, :, :](uint16, uint16)')
def build_features(width: int, height: int) -> np.ndarray: def build_features(width: int, height: int) -> np.ndarray:
"""Initialize the features based on the input shape. """Initialize the features base on the input shape.
Args: Args:
shape (Tuple[int, int]): Shape of the image (Width, Height) shape (Tuple[int, int]): Shape of the image (Width, Height).
Returns: Returns:
np.ndarray: The initialized features np.ndarray: The initialized features.
""" """
feats = [] feats = []
empty = (0, 0, 0, 0) empty = (0, 0, 0, 0)
@ -63,10 +63,10 @@ def init_weights(y_train: np.ndarray) -> np.ndarray:
"""Initialize the weights of the weak classifiers based on the training labels. """Initialize the weights of the weak classifiers based on the training labels.
Args: Args:
y_train (np.ndarray): Training labels y_train (np.ndarray): Training labels.
Returns: Returns:
np.ndarray: The initialized weights np.ndarray: The initialized weights.
""" """
weights = np.empty_like(y_train, dtype = np.float64) weights = np.empty_like(y_train, dtype = np.float64)
t = y_train.sum() t = y_train.sum()
@ -79,48 +79,26 @@ def classify_weak_clf(x_feat_i: np.ndarray, threshold: int, polarity: int) -> np
"""Classify the integrated features based on polarity and threshold. """Classify the integrated features based on polarity and threshold.
Args: Args:
x_feat_i (np.ndarray): Integrated features x_feat_i (np.ndarray): Integrated features.
threshold (int): Trained threshold threshold (int): Trained threshold.
polarity (int): Trained polarity polarity (int): Trained polarity.
Returns: Returns:
np.ndarray: Classified features np.ndarray: Classified features.
""" """
res = np.zeros_like(x_feat_i, dtype = np.int8) res = np.zeros_like(x_feat_i, dtype = np.int8)
res[polarity * x_feat_i < polarity * threshold] = 1 res[polarity * x_feat_i < polarity * threshold] = 1
return res return res
@njit('uint8[:](float64[:], int32[:, :], int32[:, :])')
def classify_viola_jones(alphas: np.ndarray, classifiers: np.ndarray, X_feat: np.ndarray) -> np.ndarray:
"""Classify the trained classifiers on the given features.
Args:
alphas (np.ndarray): Trained alphas
classifiers (np.ndarray): Trained classifiers
X_feat (np.ndarray): Integrated features
Returns:
np.ndarray: Classification results
"""
total = np.zeros(X_feat.shape[1], dtype = np.float64)
for i, alpha in enumerate(tqdm_iter(alphas, "Classifying ViolaJones")):
(j, threshold, polarity) = classifiers[i]
total += alpha * classify_weak_clf(X_feat[j], threshold, polarity)
y_pred = np.zeros(X_feat.shape[1], dtype = np.uint8)
y_pred[total >= 0.5 * np.sum(alphas)] = 1
return y_pred
@njit('Tuple((int32, float64, float64[:]))(int32[:, :], float64[:], int32[:, :], uint8[:])') @njit('Tuple((int32, float64, float64[:]))(int32[:, :], float64[:], int32[:, :], uint8[:])')
def select_best(classifiers: np.ndarray, weights: np.ndarray, X_feat: np.ndarray, y: np.ndarray) -> Tuple[int, float, np.ndarray]: def select_best(classifiers: np.ndarray, weights: np.ndarray, X_feat: np.ndarray, y: np.ndarray) -> Tuple[int, float, np.ndarray]:
"""Select the best classifier given their predictions. """Select the best classifier given theirs predictions.
Args: Args:
classifiers (np.ndarray): The weak classifiers classifiers (np.ndarray): The weak classifiers.
weights (np.ndarray): Trained weights of each classifiers weights (np.ndarray): Trained weights of each classifiers.
X_feat (np.ndarray): Integrated features X_feat (np.ndarray): Integrated features.
y (np.ndarray): Features labels y (np.ndarray): Features labels.
Returns: Returns:
Tuple[int, float, np.ndarray]: Index of the best classifier, the best error and the best accuracy Tuple[int, float, np.ndarray]: Index of the best classifier, the best error and the best accuracy
@ -138,13 +116,13 @@ def train_viola_jones(T: int, X_feat: np.ndarray, X_feat_argsort: np.ndarray, y:
"""Train the weak classifiers. """Train the weak classifiers.
Args: Args:
T (int): Number of weak classifiers T (int): Number of weak classifiers.
X_feat (np.ndarray): Integrated features X_feat (np.ndarray): Integrated features.
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
y (np.ndarray): Features labels y (np.ndarray): Features labels.
Returns: Returns:
Tuple[np.ndarray, np.ndarray]: List of trained alphas and the list of the final classifiers Tuple[np.ndarray, np.ndarray]: List of trained alphas and the list of the final classifiers.
""" """
weights = init_weights(y) weights = init_weights(y)
alphas, final_classifier = np.empty(T, dtype = np.float64), np.empty((T, 3), dtype = np.int32) alphas, final_classifier = np.empty(T, dtype = np.float64), np.empty((T, 3), dtype = np.int32)
@ -161,22 +139,44 @@ def train_viola_jones(T: int, X_feat: np.ndarray, X_feat_argsort: np.ndarray, y:
return alphas, final_classifier return alphas, final_classifier
#@njit @njit('uint8[:](float64[:], int32[:, :], int32[:, :])')
#def get_best_anova_features(X: np.ndarray, y: np.ndarray) -> np.ndarray: def classify_viola_jones(alphas: np.ndarray, classifiers: np.ndarray, X_feat: np.ndarray) -> np.ndarray:
# #SelectPercentile(f_classif, percentile = 10).fit(X, y).get_support(indices = True) """Classify the trained classifiers on the given features.
# classes = [X.T[y == 0].astype(np.float64), X.T[y == 1].astype(np.float64)]
# n_samples_per_class = np.asarray([classes[0].shape[0], classes[1].shape[0]]) Args:
# n_samples = classes[0].shape[0] + classes[1].shape[0] alphas (np.ndarray): Trained alphas.
# ss_all_data = (classes[0] ** 2).sum(axis = 0) + (classes[1] ** 2).sum(axis = 0) classifiers (np.ndarray): Trained classifiers.
# sums_classes = [np.asarray(classes[0].sum(axis = 0)), np.asarray(classes[1].sum(axis = 0))] X_feat (np.ndarray): Integrated features.
# sq_of_sums_all_data = (sums_classes[0] + sums_classes[1]) ** 2
# sq_of_sums_args = [sums_classes[0] ** 2, sums_classes[1] ** 2] Returns:
# ss_tot = ss_all_data - sq_of_sums_all_data / n_samples np.ndarray: Classification results.
# """
# sqd_sum_bw_n = sq_of_sums_args[0] / n_samples_per_class[0] + \ total = np.zeros(X_feat.shape[1], dtype = np.float64)
# sq_of_sums_args[1] / n_samples_per_class[1] - sq_of_sums_all_data / n_samples
# ss_wn = ss_tot - sqd_sum_bw_n for i, alpha in enumerate(tqdm_iter(alphas, "Classifying ViolaJones")):
# df_wn = n_samples - 2 (j, threshold, polarity) = classifiers[i]
# msw = ss_wn / df_wn total += alpha * classify_weak_clf(X_feat[j], threshold, polarity)
# f_values = sqd_sum_bw_n / msw
# return np.sort(np.argsort(f_values)[::-1][: int(np.ceil(X.shape[0] / 10.0))]) y_pred = np.zeros(X_feat.shape[1], dtype = np.uint8)
y_pred[total >= 0.5 * np.sum(alphas)] = 1
return y_pred
@njit
def get_best_anova_features(X: np.ndarray, y: np.ndarray) -> np.ndarray:
#SelectPercentile(f_classif, percentile = 10).fit(X, y).get_support(indices = True)
classes = [X.T[y == 0].astype(np.float64), X.T[y == 1].astype(np.float64)]
n_samples_per_class = np.asarray([classes[0].shape[0], classes[1].shape[0]])
n_samples = classes[0].shape[0] + classes[1].shape[0]
ss_alldata = (classes[0] ** 2).sum(axis = 0) + (classes[1] ** 2).sum(axis = 0)
sums_classes = [np.asarray(classes[0].sum(axis = 0)), np.asarray(classes[1].sum(axis = 0))]
sq_of_sums_alldata = (sums_classes[0] + sums_classes[1]) ** 2
sq_of_sums_args = [sums_classes[0] ** 2, sums_classes[1] ** 2]
ss_tot = ss_alldata - sq_of_sums_alldata / n_samples
sqd_sum_bw_n = sq_of_sums_args[0] / n_samples_per_class[0] + \
sq_of_sums_args[1] / n_samples_per_class[1] - sq_of_sums_alldata / n_samples
ss_wn = ss_tot - sqd_sum_bw_n
df_wn = n_samples - 2
msw = ss_wn / df_wn
f_values = sqd_sum_bw_n / msw
return np.sort(np.argsort(f_values)[::-1][: int(np.ceil(X.shape[0] / 10.0))])

View File

@ -18,10 +18,10 @@ def set_integral_image(X: np.ndarray) -> np.ndarray:
"""Transform the input images in integrated images (CPU version). """Transform the input images in integrated images (CPU version).
Args: Args:
X (np.ndarray): Dataset of images X (np.ndarray): Dataset of images.
Returns: Returns:
np.ndarray: Dataset of integrated images np.ndarray: Dataset of integrated images.
""" """
X_ii = np.empty_like(X, dtype = np.uint32) X_ii = np.empty_like(X, dtype = np.uint32)
for i, Xi in enumerate(tqdm_iter(X, "Applying integral image")): for i, Xi in enumerate(tqdm_iter(X, "Applying integral image")):
@ -34,18 +34,59 @@ def set_integral_image(X: np.ndarray) -> np.ndarray:
X_ii[i] = ii X_ii[i] = ii
return X_ii return X_ii
@njit('uint32(uint32[:, :], int16, int16, int16, int16)')
def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
"""Compute a feature on an integrated image at a specific coordinate (CPU version).
Args:
ii (np.ndarray): Integrated image.
x (int): X coordinate.
y (int): Y coordinate.
w (int): width of the feature.
h (int): height of the feature.
Returns:
int: Computed feature.
"""
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
"""Apply the features on a integrated image dataset (CPU version).
Args:
feats (np.ndarray): Features to apply.
X_ii (np.ndarray): Integrated image dataset.
Returns:
np.ndarray: Applied features.
"""
X_feat = np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32)
for i, (p, n) in enumerate(tqdm_iter(feats, "Applying features")):
for j, x_i in enumerate(X_ii):
p_x, p_y, p_w, p_h = p[0]
p1_x, p1_y, p1_w, p1_h = p[1]
n_x, n_y, n_w, n_h = n[0]
n1_x, n1_y, n1_w, n1_h = n[1]
p1 = __compute_feature__(x_i, p_x, p_y, p_w, p_h) + __compute_feature__(x_i, p1_x, p1_y, p1_w, p1_h)
n1 = __compute_feature__(x_i, n_x, n_y, n_w, n_h) + __compute_feature__(x_i, n1_x, n1_y, n1_w, n1_h)
X_feat[i, j] = int32(p1) - int32(n1)
return X_feat
@njit('int32[:, :](int32[:, :], uint16[:, :], uint8[:], float64[:])') @njit('int32[:, :](int32[:, :], uint16[:, :], uint8[:], float64[:])')
def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray, weights: np.ndarray) -> np.ndarray: def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray, weights: np.ndarray) -> np.ndarray:
"""Train the weak classifiers on a given dataset (CPU version). """Train the weak classifiers on a given dataset (CPU version).
Args: Args:
X_feat (np.ndarray): Feature images dataset X_feat (np.ndarray): Feature images dataset.
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
y (np.ndarray): Labels of the features y (np.ndarray): Labels of the features.
weights (np.ndarray): Weights of the features weights (np.ndarray): Weights of the features.
Returns: Returns:
np.ndarray: Trained weak classifiers np.ndarray: Trained weak classifiers.
""" """
total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum() total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum()
@ -71,85 +112,29 @@ def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray
classifiers[i] = (best_threshold, best_polarity) classifiers[i] = (best_threshold, best_polarity)
return classifiers return classifiers
@njit('uint32(uint32[:, :], int16, int16, int16, int16)')
def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
"""Compute a feature on an integrated image at a specific coordinate (CPU version).
Args:
ii (np.ndarray): Integrated image
x (int): X coordinate
y (int): Y coordinate
w (int): width of the feature
h (int): height of the feature
Returns:
int: Computed feature
"""
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
"""Apply the features on a integrated image dataset (CPU version).
Args:
feats (np.ndarray): Features to apply
X_ii (np.ndarray): Integrated image dataset
Returns:
np.ndarray: Applied features
"""
X_feat = np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32)
for i, (p, n) in enumerate(tqdm_iter(feats, "Applying features")):
for j, x_i in enumerate(X_ii):
p_x, p_y, p_w, p_h = p[0]
p1_x, p1_y, p1_w, p1_h = p[1]
n_x, n_y, n_w, n_h = n[0]
n1_x, n1_y, n1_w, n1_h = n[1]
p1 = __compute_feature__(x_i, p_x, p_y, p_w, p_h) + __compute_feature__(x_i, p1_x, p1_y, p1_w, p1_h)
n1 = __compute_feature__(x_i, n_x, n_y, n_w, n_h) + __compute_feature__(x_i, n1_x, n1_y, n1_w, n1_h)
X_feat[i, j] = int32(p1) - int32(n1)
return X_feat
@njit('int32(int32[:], uint16[:], int32, int32)') @njit('int32(int32[:], uint16[:], int32, int32)')
def _as_partition_(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> int: def as_partition(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> int:
"""Partition of the argsort algorithm. i = l - 1
j = l
Args: for j in range(l, h + 1):
d_a (np.ndarray): Array on device to sort if a[indices[j]] < a[indices[h]]:
d_indices (np.ndarray): Array of indices on device to write to
low (int): lower bound to sort
high (int): higher bound to sort
Returns:
int: Last index sorted
"""
i, j = low - 1, low
for j in range(low, high + 1):
if d_a[d_indices[j]] < d_a[d_indices[high]]:
i += 1 i += 1
d_indices[i], d_indices[j] = d_indices[j], d_indices[i] indices[i], indices[j] = indices[j], indices[i]
i += 1 i += 1
d_indices[i], d_indices[j] = d_indices[j], d_indices[i] indices[i], indices[j] = indices[j], indices[i]
return i return i
@njit('void(int32[:], uint16[:], int32, int32)') @njit('void(int32[:], uint16[:], int32, int32)')
def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> None: def argsort_bounded(a: np.ndarray, indices: np.ndarray, l: int, h: int):
"""Perform an indirect sort of a given array within a given bound. total = h - l + 1;
Args:
d_a (np.ndarray): Array to sort
d_indices (np.ndarray): Array of indices to write to
low (int): lower bound to sort
high (int): higher bound to sort
"""
total = high - low + 1
stack = np.empty((total,), dtype = np.int32) stack = np.empty((total,), dtype = np.int32)
stack[0] = low stack[0] = l
stack[1] = high stack[1] = h
top = 1 top = 1;
low = l
high = h
while top >= 0: while top >= 0:
high = stack[top] high = stack[top]
@ -158,32 +143,24 @@ def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int)
top -= 1 top -= 1
if low >= high: if low >= high:
break break;
p = _as_partition_(d_a, d_indices, low, high) p = as_partition(a, indices, low, high);
if p - 1 > low: if p - 1 > low:
top += 1 top += 1
stack[top] = low stack[top] = low;
top += 1 top += 1
stack[top] = p - 1 stack[top] = p - 1;
if p + 1 < high: if p + 1 < high:
top += 1 top += 1
stack[top] = p + 1 stack[top] = p + 1;
top += 1 top += 1
stack[top] = high stack[top] = high;
@njit('uint16[:, :](int32[:, :])') @njit('uint16[:, :](int32[:, :])')
def argsort_2d(X_feat: np.ndarray) -> np.ndarray: def argsort(X_feat: np.ndarray) -> np.ndarray:
"""Perform an indirect sort of a given array.
Args:
X_feat (np.ndarray): Array to sort
Returns:
np.ndarray: Array of indices that sort the array
"""
indices = np.empty_like(X_feat, dtype = np.uint16) indices = np.empty_like(X_feat, dtype = np.uint16)
indices[:, :] = np.arange(indices.shape[1]) indices[:, :] = np.arange(indices.shape[1])
for i in tqdm_iter(range(X_feat.shape[0]), "argsort"): for i in tqdm_iter(range(X_feat.shape[0]), "argsort"):

View File

@ -12,10 +12,10 @@ def __scanCPU_3d__(X: np.ndarray) -> np.ndarray:
"""Prefix Sum (scan) of a given dataset. """Prefix Sum (scan) of a given dataset.
Args: Args:
X (np.ndarray): Dataset of images to apply sum X (np.ndarray): Dataset of images to apply sum.
Returns: Returns:
np.ndarray: Scanned dataset of images np.ndarray: Scanned dataset of images.
""" """
for x in range(X.shape[0]): for x in range(X.shape[0]):
for y in range(X.shape[1]): for y in range(X.shape[1]):
@ -30,10 +30,10 @@ def __kernel_scan_3d__(n: int, j: int, d_inter: np.ndarray, d_a: np.ndarray) ->
"""GPU kernel used to do a parallel prefix sum (scan). """GPU kernel used to do a parallel prefix sum (scan).
Args: Args:
n (int): Number of width blocks n (int):
j (int): Temporary sum index j (int): [description]
d_inter (np.ndarray): Temporary sums on device to add d_inter (np.ndarray): [description]
d_a (np.ndarray): Dataset of images on device to apply sum d_a (np.ndarray): [description]
""" """
x_coor, y_coor = cuda.grid(2) x_coor, y_coor = cuda.grid(2)
@ -76,10 +76,10 @@ def __add_3d__(d_X: np.ndarray, d_s: np.ndarray, n: int, m: int) -> None:
"""GPU kernel for parallel sum. """GPU kernel for parallel sum.
Args: Args:
d_X (np.ndarray): Dataset of images on device d_X (np.ndarray): Dataset of images.
d_s (np.ndarray): Temporary sums on device to add d_s (np.ndarray): Temporary sums to add.
n (int): Number of width blocks n (int): Number of width blocks.
m (int): Height of a block m (int): Height of a block.
""" """
x_coor, y_coor = cuda.grid(2) x_coor, y_coor = cuda.grid(2)
if x_coor < n and y_coor < m: if x_coor < n and y_coor < m:
@ -91,10 +91,10 @@ def __scanGPU_3d__(X: np.ndarray) -> np.ndarray:
Read more: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda Read more: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
Args: Args:
X (np.ndarray): Dataset of images X (np.ndarray): Dataset of images.
Returns: Returns:
np.ndarray: Scanned dataset of images np.ndarray: Scanned dataset of images.
""" """
k, height, n = X.shape k, height, n = X.shape
n_block_x, n_block_y = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64) n_block_x, n_block_y = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64)
@ -131,10 +131,10 @@ def __transpose_kernel__(d_X: np.ndarray, d_Xt: np.ndarray) -> None:
"""GPU kernel of the function __transpose_3d__. """GPU kernel of the function __transpose_3d__.
Args: Args:
d_X (np.ndarray): Dataset of images on device d_X (np.ndarray): Dataset of images.
d_Xt(np.ndarray): Transposed dataset of images d_Xt(np.ndarray): Transposed dataset of images.
width (int): Width of each images in the dataset width (int): Width of each images in the dataset.
height (int): Height of each images in the dataset height (int): Height of each images in the dataset.
""" """
temp = cuda.shared.array(NB_THREADS_2D, dtype = uint32) temp = cuda.shared.array(NB_THREADS_2D, dtype = uint32)
@ -152,10 +152,10 @@ def __transpose_3d__(X: np.ndarray) -> np.ndarray:
"""Transpose every images in the given dataset. """Transpose every images in the given dataset.
Args: Args:
X (np.ndarray): Dataset of images X (np.ndarray): Dataset of images.
Returns: Returns:
np.ndarray: Transposed dataset of images np.ndarray: Transposed dataset of images.
""" """
n_block_x, n_block_z = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64) n_block_x, n_block_z = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64)
d_X = cuda.to_device(X) d_X = cuda.to_device(X)
@ -167,10 +167,10 @@ def set_integral_image(X: np.ndarray) -> np.ndarray:
"""Transform the input images in integrated images (GPU version). """Transform the input images in integrated images (GPU version).
Args: Args:
X (np.ndarray): Dataset of images X (np.ndarray): Dataset of images.
Returns: Returns:
np.ndarray: Dataset of integrated images np.ndarray: Dataset of integrated images.
""" """
X = X.astype(np.uint32) X = X.astype(np.uint32)
X = __scanGPU_3d__(X) X = __scanGPU_3d__(X)
@ -184,13 +184,13 @@ def __train_weak_clf_kernel__(d_classifiers: np.ndarray, d_y: np.ndarray, d_X_fe
"""GPU kernel of the function train_weak_clf. """GPU kernel of the function train_weak_clf.
Args: Args:
d_classifiers (np.ndarray): Weak classifiers on device to train d_classifiers (np.ndarray): Weak classifiers to train.
d_y (np.ndarray): Labels of the features on device d_y (np.ndarray): Labels of the features.
d_X_feat (np.ndarray): Feature images dataset on device d_X_feat (np.ndarray): Feature images dataset.
d_X_feat_argsort (np.ndarray): Sorted indexes of the integrated features on device d_X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
d_weights (np.ndarray): Weights of the features on device d_weights (np.ndarray): Weights of the features.
total_pos (float): Total of positive labels in the dataset total_pos (float): Total of positive labels in the dataset.
total_neg (float): Total of negative labels in the dataset total_neg (float): Total of negative labels in the dataset.
""" """
i = cuda.blockIdx.x * cuda.blockDim.x * cuda.blockDim.y * cuda.blockDim.z i = cuda.blockIdx.x * cuda.blockDim.x * cuda.blockDim.y * cuda.blockDim.z
i += cuda.threadIdx.x * cuda.blockDim.y * cuda.blockDim.z i += cuda.threadIdx.x * cuda.blockDim.y * cuda.blockDim.z
@ -224,13 +224,13 @@ def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray
"""Train the weak classifiers on a given dataset (GPU version). """Train the weak classifiers on a given dataset (GPU version).
Args: Args:
X_feat (np.ndarray): Feature images dataset X_feat (np.ndarray): Feature images dataset.
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
y (np.ndarray): Labels of the features y (np.ndarray): Labels of the features.
weights (np.ndarray): Weights of the features weights (np.ndarray): Weights of the features.
Returns: Returns:
np.ndarray: Trained weak classifiers np.ndarray: Trained weak classifiers.
""" """
total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum() total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum()
d_classifiers = cuda.to_device(np.empty((X_feat.shape[0], 2), dtype = np.int32)) d_classifiers = cuda.to_device(np.empty((X_feat.shape[0], 2), dtype = np.int32))
@ -247,52 +247,52 @@ def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
"""Compute a feature on an integrated image at a specific coordinate (GPU version). """Compute a feature on an integrated image at a specific coordinate (GPU version).
Args: Args:
ii (np.ndarray): Integrated image ii (np.ndarray): Integrated image.
x (int): X coordinate x (int): X coordinate.
y (int): Y coordinate y (int): Y coordinate.
w (int): width of the feature w (int): width of the feature.
h (int): height of the feature h (int): height of the feature.
Returns: Returns:
int: Computed feature int: Computed feature.
""" """
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w] return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
@cuda.jit('void(int32[:, :], uint8[:, :, :, :], uint32[:, :, :])') @cuda.jit('void(int32[:, :], uint8[:, :, :, :], uint32[:, :, :])')
def __apply_feature_kernel__(d_X_feat: np.ndarray, d_feats: np.ndarray, d_X_ii: np.ndarray) -> None: def __apply_feature_kernel__(X_feat: np.ndarray, feats: np.ndarray, X_ii: np.ndarray) -> None:
"""GPU kernel of the function apply_features. """GPU kernel of the function apply_features.
Args: Args:
d_X_feat (np.ndarray): Feature images dataset on device X_feat (np.ndarray): Feature images dataset.
d_feats (np.ndarray): Features on device to apply feats (np.ndarray): Features to apply.
d_X_ii (np.ndarray): Integrated image dataset on device X_ii (np.ndarray): Integrated image dataset.
n (int): Number of features n (int): Number of features.
m (int): Number of images of the dataset m (int): Number of images of the dataset.
""" """
x, y = cuda.grid(2) x, y = cuda.grid(2)
if x >= d_feats.shape[0] or y >= d_X_ii.shape[0]: if x >= feats.shape[0] or y >= X_ii.shape[0]:
return return
p_x, p_y, p_w, p_h = d_feats[x, 0, 0] p_x, p_y, p_w, p_h = feats[x, 0, 0]
p1_x, p1_y, p1_w, p1_h = d_feats[x, 0, 1] p1_x, p1_y, p1_w, p1_h = feats[x, 0, 1]
n_x, n_y, n_w, n_h = d_feats[x, 1, 0] n_x, n_y, n_w, n_h = feats[x, 1, 0]
n1_x, n1_y, n1_w, n1_h = d_feats[x, 1, 1] n1_x, n1_y, n1_w, n1_h = feats[x, 1, 1]
sP = __compute_feature__(d_X_ii[y], p_x, p_y, p_w, p_h) + \ sP = __compute_feature__(X_ii[y], p_x, p_y, p_w, p_h) + \
__compute_feature__(d_X_ii[y], p1_x, p1_y, p1_w, p1_h) __compute_feature__(X_ii[y], p1_x, p1_y, p1_w, p1_h)
sN = __compute_feature__(d_X_ii[y], n_x, n_y, n_w, n_h) + \ sN = __compute_feature__(X_ii[y], n_x, n_y, n_w, n_h) + \
__compute_feature__(d_X_ii[y], n1_x, n1_y, n1_w, n1_h) __compute_feature__(X_ii[y], n1_x, n1_y, n1_w, n1_h)
d_X_feat[x, y] = sP - sN X_feat[x, y] = sP - sN
#@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])') #@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray: def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
"""Apply the features on a integrated image dataset (GPU version). """Apply the features on a integrated image dataset (GPU version).
Args: Args:
feats (np.ndarray): Features to apply feats (np.ndarray): Features to apply.
X_ii (np.ndarray): Integrated image dataset X_ii (np.ndarray): Integrated image dataset.
Returns: Returns:
np.ndarray: Applied features np.ndarray: Applied features.
""" """
d_X_feat = cuda.to_device(np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32)) d_X_feat = cuda.to_device(np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32))
d_feats = cuda.to_device(feats) d_feats = cuda.to_device(feats)
@ -303,44 +303,28 @@ def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
return d_X_feat.copy_to_host() return d_X_feat.copy_to_host()
@cuda.jit('int32(int32[:], uint16[:], int32, int32)', device = True) @cuda.jit('int32(int32[:], uint16[:], int32, int32)', device = True)
def _as_partition_(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> int: def as_partition(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> int:
"""Partition of the argsort algorithm. i = l - 1
j = l
Args: for j in range(l, h + 1):
d_a (np.ndarray): Array on device to sort if a[indices[j]] < a[indices[h]]:
d_indices (np.ndarray): Array of indices on device to write to
low (int): lower bound to sort
high (int): higher bound to sort
Returns:
int: Last index sorted
"""
i = low - 1
j = low
for j in range(low, high + 1):
if d_a[d_indices[j]] < d_a[d_indices[high]]:
i += 1 i += 1
d_indices[i], d_indices[j] = d_indices[j], d_indices[i] indices[i], indices[j] = indices[j], indices[i]
i += 1 i += 1
d_indices[i], d_indices[j] = d_indices[j], d_indices[i] indices[i], indices[j] = indices[j], indices[i]
return i return i
@cuda.jit('void(int32[:], uint16[:], int32, int32)', device = True) @cuda.jit('void(int32[:], uint16[:], int32, int32)', device = True)
def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> None: def argsort_bounded(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> None:
"""Perform an indirect sort of a given array within a given bound. #total = h - l + 1;
Args:
d_a (np.ndarray): Array on device to sort
d_indices (np.ndarray): Array of indices on device to write to
low (int): lower bound to sort
high (int): higher bound to sort
"""
#total = high - low + 1;
stack = cuda.local.array(6977, int32) stack = cuda.local.array(6977, int32)
stack[0] = low stack[0] = l
stack[1] = high stack[1] = h
top = 1 top = 1;
low = l
high = h
while top >= 0: while top >= 0:
high = stack[top] high = stack[top]
@ -349,50 +333,35 @@ def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int)
top -= 1 top -= 1
if low >= high: if low >= high:
break break;
p = _as_partition_(d_a, d_indices, low, high) p = as_partition(a, indices, low, high);
if p - 1 > low: if p - 1 > low:
top += 1 top += 1
stack[top] = low stack[top] = low;
top += 1 top += 1
stack[top] = p - 1 stack[top] = p - 1;
if p + 1 < high: if p + 1 < high:
top += 1 top += 1
stack[top] = p + 1 stack[top] = p + 1;
top += 1 top += 1
stack[top] = high stack[top] = high;
@cuda.jit('void(int32[:, :], uint16[:, :])') @cuda.jit('void(int32[:, :], uint16[:, :])')
def argsort_flatter(d_a: np.ndarray, d_indices: np.ndarray) -> None: def argsort_flatter(X_feat: np.ndarray, indices: np.ndarray) -> None:
# TODO Finish doxygen
"""Cuda kernel where argsort is applied to every column of a given 2D array.
Args:
d_a (np.ndarray): 2D Array on device to sort
d_indices (np.ndarray): 2D Array of indices on device to write to
"""
i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
if i < d_a.shape[0]: if i < X_feat.shape[0]:
for j in range(d_indices.shape[1]): for j in range(indices.shape[1]):
d_indices[i, j] = j indices[i, j] = j
argsort_bounded(d_a[i], d_indices[i], 0, d_a.shape[1] - 1) argsort_bounded(X_feat[i], indices[i], 0, X_feat.shape[1] - 1)
def argsort_2d(a: np.ndarray) -> np.ndarray: def argsort(X_feat: np.ndarray) -> np.ndarray:
"""Perform an indirect sort on each column of a given 2D array indices = np.empty_like(X_feat, dtype = np.uint16)
n_blocks = int(np.ceil(np.divide(X_feat.shape[0], NB_THREADS)))
Args: d_X_feat = cuda.to_device(X_feat)
a (np.ndarray): 2D Array to sort
Returns:
np.ndarray: 2D Array of indices that sort the array
"""
indices = np.empty_like(a, dtype = np.uint16)
n_blocks = int(np.ceil(np.divide(a.shape[0], NB_THREADS)))
d_a = cuda.to_device(a)
d_indices = cuda.to_device(indices) d_indices = cuda.to_device(indices)
argsort_flatter[n_blocks, NB_THREADS](d_a, d_indices) argsort_flatter[n_blocks, NB_THREADS](d_X_feat, d_indices)
cuda.synchronize() cuda.synchronize()
return d_indices.copy_to_host() return d_indices.copy_to_host()

View File

@ -3,8 +3,9 @@
# Exit if any of the command doesn't exit with code 0 # Exit if any of the command doesn't exit with code 0
set -e set -e
test -z "$EXEC_DIR" && EXEC_DIR=. EXEC_DIR=$1
test -z "$VENV_PATH" && VENV_PATH="$EXEC_DIR/venv" test -z "$EXEC_DIR" && EXEC_DIR=..
VENV_PATH=$EXEC_DIR/python/venv
activate(){ activate(){
if [ ! -d "$VENV_PATH" ]; then if [ ! -d "$VENV_PATH" ]; then
@ -15,9 +16,9 @@ activate(){
echo 'Updating base pip packages' echo 'Updating base pip packages'
python -m pip install -U setuptools pip python -m pip install -U setuptools pip
echo 'Installing requirements' echo 'Installing requirements'
pip install -r requirements.txt pip install -r "$EXEC_DIR"/python/requirements.txt
elif [ -f "$VENV_PATH"/Scripts/activate ]; then . "$VENV_PATH"/Scripts/activate elif [ -f "$VENV_PATH"/Scripts/activate ]; then source "$VENV_PATH"/Scripts/activate
elif [ -f "$VENV_PATH"/bin/activate ]; then . "$VENV_PATH"/bin/activate elif [ -f "$VENV_PATH"/bin/activate ]; then source "$VENV_PATH"/bin/activate
else else
echo 'Python virtual environnement not detected' echo 'Python virtual environnement not detected'
exit 1 exit 1

View File

@ -1,29 +1,29 @@
from toolbox import pickle_multi_loader, format_time_ns, unit_test_argsort_2d, header, footer, formatted_line, formatted_row from toolbox import picke_multi_loader, format_time_ns, unit_test_argsort_2d
from typing import List, Tuple from typing import List, Tuple
from time import perf_counter_ns from time import perf_counter_ns
from sys import stderr
import numpy as np import numpy as np
from config import OUT_DIR, DATA_DIR, __DEBUG from config import OUT_DIR, DATA_DIR, __DEBUG
def unit_test(TS: List[int], labels: List[str] = ['CPU', 'GPU', 'PY', 'PGPU'], tol: float = 1e-8) -> None: def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU", "PY", "PGPU"], tol: float = 1e-8) -> None:
"""Test if the each result is equals to other devices. """Test if the each result is equals to other devices.
Given ViolaJones is a fully deterministic algorithm. The results, regardless the device, should be the same Given ViolaJones is a deterministic algorithm, the results no matter the device should be the same
(given the floating point fluctuations), this function check this assertion. (given the floating point fluctuations), this function check this assertion.
Args: Args:
TS (List[int]): Number of trained weak classifiers TS (List[int]): Number of trained weak classifiers.
labels (List[str], optional): List of the trained device names. Defaults to ['CPU', 'GPU', 'PY', 'PGPU'] (see config.py for more info) labels (List[str], optional): List of the trained device names. Defaults to ["CPU", "GPU", "PY", "PGPU"] (see config.py for more info).
tol (float, optional): Float difference tolerance. Defaults to 1e-8 tol (float, optional): Float difference tolerance. Defaults to 1e-8.
""" """
if len(labels) < 2: if len(labels) < 2:
return print('Not enough devices to test') return print("Not enough devices to test")
unit_gaps = [37, -10, -18, 29] print(f"\n| {'Unit testing':<37} | {'Test state':<10} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |")
header(unit_gaps, ['Unit testing', 'Test state', 'Time spent (ns)', 'Formatted time spent']) print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|")
unit_timestamp = perf_counter_ns() fnc_s = perf_counter_ns()
n_total, n_success = 0, 0 n_total = 0
n_success = 0
def test_fnc(title, fnc): def test_fnc(title, fnc):
nonlocal n_total, n_success nonlocal n_total, n_success
@ -32,104 +32,96 @@ def unit_test(TS: List[int], labels: List[str] = ['CPU', 'GPU', 'PY', 'PGPU'], t
state = fnc() state = fnc()
e = perf_counter_ns() - s e = perf_counter_ns() - s
if state: if state:
formatted_row(unit_gaps, [title, 'Passed', f'{e:,}', format_time_ns(e)]) print(f"| {title:<37} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
n_success += 1 n_success += 1
else: else:
formatted_row(unit_gaps, [title, 'Failed', f'{e:,}', format_time_ns(e)]) print(f"| {title:<37} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
for set_name in ['train', 'test']: for set_name in ["train", "test"]:
for filename in ['ii', 'feat']: for filename in ["ii", "feat"]:
title = f'X_{set_name}_{filename}' title = f"X_{set_name}_{filename}"
print(f'{filename}...', file = stderr, end = '\r') print(f"{filename}...", end = "\r")
bs = pickle_multi_loader([f'{title}_{label}' for label in labels], OUT_DIR) bs = picke_multi_loader([f"{title}_{label}" for label in labels], OUT_DIR)
for i, (b1, l1) in enumerate(zip(bs, labels)): for i, (b1, l1) in enumerate(zip(bs, labels)):
if b1 is None: if b1 is None:
if __DEBUG: if __DEBUG:
formatted_row(unit_gaps, [f'{title:<22} - {l1:<12}', 'Skipped', 'None', 'None']) print(f"| {title:<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
continue continue
for j, (b2, l2) in enumerate(zip(bs, labels)): for j, (b2, l2) in enumerate(zip(bs, labels)):
if i >= j: if i >= j:
continue continue
if b2 is None: if b2 is None:
if __DEBUG: if __DEBUG:
formatted_row(unit_gaps, [f'{title:<22} - {l1:<4} vs {l2:<4}', 'Skipped', 'None', 'None']) print(f"| {title:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
continue continue
test_fnc(f'{title:<22} - {l1:<4} vs {l2:<4}', lambda: np.abs(b1 - b2).mean() < tol) test_fnc(f"{title:<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol)
title = f'X_{set_name}_feat_argsort' title = f"X_{set_name}_feat_argsort"
print(f'Loading {title}...', file = stderr, end = '\r') print(f"Loading {title}...", end = "\r")
feat = None feat = None
#indices = pickle_multi_loader(['indices'], OUT_DIR)[0]
bs = [] bs = []
for label in labels: for label in labels:
if feat is None: if feat is None:
feat_tmp = pickle_multi_loader([f'X_{set_name}_feat_{label}'], OUT_DIR)[0] feat_tmp = picke_multi_loader([f"X_{set_name}_feat_{label}"], OUT_DIR)[0]
if feat_tmp is not None: if feat_tmp is not None:
#feat = feat_tmp[indices]
feat = feat_tmp feat = feat_tmp
bs.append(pickle_multi_loader([f'{title}_{label}'], OUT_DIR)[0]) bs.append(picke_multi_loader([f"{title}_{label}"], OUT_DIR)[0])
for i, (b1, l1) in enumerate(zip(bs, labels)): for i, (b1, l1) in enumerate(zip(bs, labels)):
if b1 is None: if b1 is None:
if __DEBUG: if __DEBUG:
formatted_row(unit_gaps, [f'{title:<22} - {l1:<12}', 'Skipped', 'None', 'None']) print(f"| {title:<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
continue continue
if feat is not None: if feat is not None:
test_fnc(f'{title:<22} - {l1:<4} argsort', lambda: unit_test_argsort_2d(feat, b1)) test_fnc(f"{title:<22} - {l1:<4} argsort", lambda: unit_test_argsort_2d(feat, b1))
for j, (b2, l2) in enumerate(zip(bs, labels)): for j, (b2, l2) in enumerate(zip(bs, labels)):
if i >= j: if i >= j:
continue continue
if b2 is None: if b2 is None:
if __DEBUG: if __DEBUG:
formatted_row(unit_gaps, [f'{title:<22} - {l1:<4} vs {l2:<4}', 'Skipped', 'None', 'None']) print(f"| {title:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
continue continue
test_fnc(f'{title:<22} - {l1:<4} vs {l2:<4}', lambda: np.abs(b1 - b2).mean() < tol) test_fnc(f"{title:<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol)
for T in TS: for T in TS:
for filename in ['alphas', 'final_classifiers']: for filename in ["alphas", "final_classifiers"]:
print(f'{filename}_{T}...', file = stderr, end = '\r') print(f"{filename}_{T}...", end = "\r")
bs = pickle_multi_loader([f'{filename}_{T}_{label}' for label in labels]) bs = picke_multi_loader([f"{filename}_{T}_{label}" for label in labels])
for i, (b1, l1) in enumerate(zip(bs, labels)): for i, (b1, l1) in enumerate(zip(bs, labels)):
if b1 is None: if b1 is None:
if __DEBUG: if __DEBUG:
formatted_row(unit_gaps, [f"{filename + '_' + str(T):<22} - {l1:<12}", 'Skipped', 'None', 'None']) print(f"| {filename + '_' + str(T):<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
continue continue
for j, (b2, l2) in enumerate(zip(bs, labels)): for j, (b2, l2) in enumerate(zip(bs, labels)):
if i >= j: if i >= j:
continue continue
if b2 is None: if b2 is None:
if __DEBUG: if __DEBUG:
formatted_row(unit_gaps, [f"{filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4}", 'Skipped', 'None', 'None']) print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
continue continue
test_fnc(f"{filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol) test_fnc(f"{filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol)
time_spent = perf_counter_ns() - unit_timestamp print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|")
e = perf_counter_ns() - fnc_s
if n_total == 0: print(f"| {'Unit testing summary':<37} | {str(n_success) + '/' + str(n_total):>10} | {e:>18,} | {format_time_ns(e):<29} |")
formatted_row(unit_gaps, ['Unit testing summary', 'No files', f'{time_spent:,}', format_time_ns(time_spent)])
else:
formatted_line(unit_gaps, '', '', '', '')
formatted_row(unit_gaps, ['Unit testing summary', f'{n_success}/{n_total}', f'{time_spent:,}', format_time_ns(time_spent)])
footer(unit_gaps)
def load_datasets(data_dir: str = DATA_DIR) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: def load_datasets(data_dir: str = DATA_DIR) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Load the datasets. """Load the datasets.
Args: Args:
data_dir (str, optional): [description]. Defaults to DATA_DIR (see config.py) data_dir (str, optional): [description]. Defaults to DATA_DIR (see config.py).
Returns: Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: X_train, y_train, X_test, y_test Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: [description]
""" """
bytes_to_int_list = lambda b: list(map(int, b.rstrip().split(' '))) bytes_to_int_list = lambda b: list(map(int, b.rstrip().split(" ")))
def load(set_name: str) -> np.ndarray: def load(set_name: str) -> np.ndarray:
with open(f'{data_dir}/{set_name}.bin', 'r') as f: with open(f"{data_dir}/{set_name}.bin", "r") as f:
shape = bytes_to_int_list(f.readline()) shape = bytes_to_int_list(f.readline())
return np.asarray(bytes_to_int_list(f.readline()), dtype = np.uint8).reshape(shape) return np.asarray(bytes_to_int_list(f.readline()), dtype = np.uint8).reshape(shape)
return load('X_train'), load('y_train'), load('X_test'), load('y_test') return load("X_train"), load("y_train"), load("X_test"), load("y_test")

View File

@ -5,9 +5,6 @@ from sys import argv
import numpy as np import numpy as np
from os import path, listdir from os import path, listdir
# Induce determinism
np.random.seed(133742)
# Makes the "leave" argument default to False # Makes the "leave" argument default to False
tqdm = partial(tqdm, leave = False) tqdm = partial(tqdm, leave = False)
@ -45,8 +42,8 @@ def __main__(data_path: str) -> None:
y.append(y_i) y.append(y_i)
X, y = np.asarray(X), np.asarray(y) X, y = np.asarray(X), np.asarray(y)
idx = np.random.permutation(y.shape[0]) # idx = np.random.permutation(y.shape[0])
X, y = X[idx], y[idx] # X, y = X[idx], y[idx]
for org, s in tqdm(zip("Xy", [X, y]), desc = f"Writing {set_name}"): for org, s in tqdm(zip("Xy", [X, y]), desc = f"Writing {set_name}"):
with open(f"{data_path}/{org}_{set_name}.bin", "w") as out: with open(f"{data_path}/{org}_{set_name}.bin", "w") as out:

View File

@ -2,14 +2,6 @@ from typing import Callable, Iterable, Union, Any
from tqdm import tqdm from tqdm import tqdm
def njit(f: Union[Callable, str] = None, *args, **kwargs) -> Callable: def njit(f: Union[Callable, str] = None, *args, **kwargs) -> Callable:
"""Wrapper for optional numba's njit decorator
Args:
f (Union[Callable, str], optional): Function to wrap with numba. Defaults to None.
Returns:
Callable: Wrapped function.
"""
def decorator(func: Callable) -> Any: def decorator(func: Callable) -> Any:
return func return func
@ -18,13 +10,4 @@ def njit(f: Union[Callable, str] = None, *args, **kwargs) -> Callable:
return decorator return decorator
def tqdm_iter(iter: Iterable, desc: str): def tqdm_iter(iter: Iterable, desc: str):
"""Wrapper for optional tqdm iterator progress bar.
Args:
iter (Iterable): Object to iterate over.
desc (str): Description written to stdout.
Returns:
_type_: Wrapped iterator.
"""
return tqdm(iter, leave = False, desc = desc) return tqdm(iter, leave = False, desc = desc)

View File

@ -1,15 +0,0 @@
services:
violajones-python:
image: saundersp/violajones-python
build: .
volumes:
- ./models:/home/ViolaJones/python/models
- ./out:/home/ViolaJones/python/out
- ../data:/home/ViolaJones/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]

View File

@ -2,15 +2,14 @@
# Author: @saundersp # Author: @saundersp
from ViolaJones import train_viola_jones, classify_viola_jones from ViolaJones import train_viola_jones, classify_viola_jones
#from toolbox import state_saver, pickle_multi_loader, format_time_ns, benchmark_function, unit_test_argsort_2d from toolbox import state_saver, picke_multi_loader, format_time_ns, benchmark_function, unit_test_argsort_2d
from toolbox import state_saver, format_time_ns, benchmark_function, unit_test_argsort_2d from toolbox_unit_test import format_time_ns_test
from toolbox import header, footer, formatted_row, formatted_line from toolbox import header, footer, formatted_row, formatted_line
from toolbox_unit_test import format_time_test, format_time_ns_test
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
#from sklearn.feature_selection import SelectPercentile, f_classif from sklearn.feature_selection import SelectPercentile, f_classif
from common import load_datasets, unit_test from common import load_datasets, unit_test
from ViolaJones import build_features # , get_best_anova_features from ViolaJones import build_features, get_best_anova_features
from typing import Tuple, List from typing import Tuple
from time import perf_counter_ns from time import perf_counter_ns
from os import makedirs from os import makedirs
import numpy as np import numpy as np
@ -20,51 +19,44 @@ if __DEBUG:
from config import IDX_INSPECT, IDX_INSPECT_OFFSET from config import IDX_INSPECT, IDX_INSPECT_OFFSET
if GPU_BOOSTED: if GPU_BOOSTED:
from ViolaJonesGPU import apply_features, set_integral_image, argsort_2d from ViolaJonesGPU import apply_features, set_integral_image, argsort
label = 'GPU' if COMPILE_WITH_C else 'PGPU' label = 'GPU' if COMPILE_WITH_C else 'PGPU'
# The parallel prefix sum doesn't use the whole GPU so numba output some annoying warnings, this disables it # The parallel prefix sum doesn't use the whole GPU so numba output some annoying warnings, this disables it
from numba import config from numba import config
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
else: else:
from ViolaJonesCPU import apply_features, set_integral_image, argsort_2d from ViolaJonesCPU import apply_features, set_integral_image, argsort
label = 'CPU' if COMPILE_WITH_C else 'PY' label = 'CPU' if COMPILE_WITH_C else 'PY'
def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Execute the preprocessing phase """Load the dataset, calculate features and integral images, apply features to images and calculate argsort of the featured images.
The preprocessing phase consist of the following steps :
- Load the dataset
- Calculate features
- Calculate integral images
- Apply features to images
- Calculate argsort of the featured images
Returns: Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Tuple containing in order : training features, training features sorted indexes, training labels, testing features, testing labels Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test
""" """
# Creating state saver folders if they don't exist already # Creating state saver folders if they don't exist already
if SAVE_STATE: if SAVE_STATE:
for folder_name in ['models', 'out']: for folder_name in ["models", "out"]:
makedirs(folder_name, exist_ok = True) makedirs(folder_name, exist_ok = True)
preproc_timestamp = perf_counter_ns() preproc_timestamp = perf_counter_ns()
preproc_gaps = [49, -18, 29] preproc_gaps = [49, -18, 29]
header(preproc_gaps, ['Preprocessing', 'Time spent (ns)', 'Formatted time spent']) header(['Preprocessing', 'Time spent (ns)', 'Formatted time spent'], preproc_gaps)
X_train, y_train, X_test, y_test = state_saver('Loading sets', preproc_gaps[0], ['X_train', 'y_train', 'X_test', 'y_test'], X_train, y_train, X_test, y_test = state_saver('Loading sets', preproc_gaps[0], ['X_train', 'y_train', 'X_test', 'y_test'],
load_datasets, FORCE_REDO, SAVE_STATE) load_datasets, FORCE_REDO, SAVE_STATE)
if __DEBUG: if __DEBUG:
print('X_train') print("X_train")
print(X_train.shape) print(X_train.shape)
print(X_train[IDX_INSPECT]) print(X_train[IDX_INSPECT])
print('X_test') print("X_test")
print(X_test.shape) print(X_test.shape)
print(X_test[IDX_INSPECT]) print(X_test[IDX_INSPECT])
print('y_train') print("y_train")
print(y_train.shape) print(y_train.shape)
print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
print('y_test') print("y_test")
print(y_test.shape) print(y_test.shape)
print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
@ -72,7 +64,7 @@ def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.
FORCE_REDO, SAVE_STATE) FORCE_REDO, SAVE_STATE)
if __DEBUG: if __DEBUG:
print('feats') print("feats")
print(feats.shape) print(feats.shape)
print(feats[IDX_INSPECT].ravel()) print(feats[IDX_INSPECT].ravel())
@ -82,10 +74,10 @@ def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.
lambda: set_integral_image(X_test), FORCE_REDO, SAVE_STATE) lambda: set_integral_image(X_test), FORCE_REDO, SAVE_STATE)
if __DEBUG: if __DEBUG:
print('X_train_ii') print("X_train_ii")
print(X_train_ii.shape) print(X_train_ii.shape)
print(X_train_ii[IDX_INSPECT]) print(X_train_ii[IDX_INSPECT])
print('X_test_ii') print("X_test_ii")
print(X_test_ii.shape) print(X_test_ii.shape)
print(X_test_ii[IDX_INSPECT]) print(X_test_ii[IDX_INSPECT])
@ -96,46 +88,45 @@ def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.
del X_train_ii, X_test_ii, feats del X_train_ii, X_test_ii, feats
if __DEBUG: if __DEBUG:
print('X_train_feat') print("X_train_feat")
print(X_train_feat.shape) print(X_train_feat.shape)
print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET]) print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
print('X_test_feat') print("X_test_feat")
print(X_test_feat.shape) print(X_test_feat.shape)
print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET]) print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
#indices = state_saver('Selecting best features training set', 'indices', force_redo = FORCE_REDO, save_state = SAVE_STATE, #indices = state_saver("Selecting best features training set", "indices", force_redo = True, save_state = SAVE_STATE,
# fnc = lambda: SelectPercentile(f_classif, percentile = 10).fit(X_train_feat.T, y_train).get_support(indices = True)) # fnc = lambda: SelectPercentile(f_classif, percentile = 10).fit(X_train_feat.T, y_train).get_support(indices = True))
#indices = state_saver('Selecting best features training set', 'indices', force_redo = FORCE_REDO, save_state = SAVE_STATE, #indices = state_saver("Selecting best features training set", "indices", force_redo = FORCE_REDO, save_state = SAVE_STATE,
# fnc = lambda: get_best_anova_features(X_train_feat, y_train)) # fnc = lambda: get_best_anova_features(X_train_feat, y_train))
#indices = benchmark_function('Selecting best features (manual)', lambda: get_best_anova_features(X_train_feat, y_train)) #indices = benchmark_function("Selecting best features (manual)", lambda: get_best_anova_features(X_train_feat, y_train))
#if __DEBUG: #if __DEBUG:
# print('indices') # print("indices")
# print(indices.shape) # print(indices.shape)
# print(indices[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) # print(indices[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
# assert indices.shape[0] == indices_new.shape[0], f'Indices length not equal : {indices.shape} != {indices_new.shape}' # assert indices.shape[0] == indices_new.shape[0], f"Indices length not equal : {indices.shape} != {indices_new.shape}"
# assert (eq := indices == indices_new).all(), f'Indices not equal : {eq.sum() / indices.shape[0]}' # assert (eq := indices == indices_new).all(), f"Indices not equal : {eq.sum() / indices.shape[0]}"
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices] # X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
X_train_feat_argsort = state_saver(f'Precalculating training set argsort ({label})', preproc_gaps[0], f'X_train_feat_argsort_{label}', X_train_feat_argsort = state_saver(f'Precalculating training set argsort ({label})', preproc_gaps[0], f'X_train_feat_argsort_{label}',
lambda: argsort_2d(X_train_feat), FORCE_REDO, SAVE_STATE) lambda: argsort(X_train_feat), FORCE_REDO, SAVE_STATE)
if __DEBUG: if __DEBUG:
print('X_train_feat_argsort') print("X_train_feat_argsort")
print(X_train_feat_argsort.shape) print(X_train_feat_argsort.shape)
print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET]) print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
benchmark_function('Arg unit test', preproc_gaps[0], lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort)) benchmark_function('Arg unit test', preproc_gaps[0], lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort))
X_test_feat_argsort = state_saver(f'Precalculating testing set argsort ({label})', preproc_gaps[0], f'X_test_feat_argsort_{label}', X_test_feat_argsort = state_saver(f"Precalculating testing set argsort ({label})", f"X_test_feat_argsort_{label}",
lambda: argsort_2d(X_test_feat), FORCE_REDO, SAVE_STATE) lambda: argsort(X_test_feat), FORCE_REDO, SAVE_STATE)
if __DEBUG: if __DEBUG:
print('X_test_feat_argsort') print("X_test_feat_argsort")
print(X_test_feat_argsort.shape) print(X_test_feat_argsort.shape)
print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET]) print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
benchmark_function('Arg unit test', lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort)) benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort))
time_spent = perf_counter_ns() - preproc_timestamp time_spent = perf_counter_ns() - preproc_timestamp
formatted_line(preproc_gaps, '', '', '', '') formatted_line(preproc_gaps, '', '', '', '')
formatted_row(preproc_gaps, ['Preprocessing summary', f'{time_spent:,}', format_time_ns(time_spent)]) formatted_row(preproc_gaps, ['Preprocessing summary', f'{time_spent:,}', format_time_ns(time_spent)])
@ -147,17 +138,16 @@ def train(X_train_feat: np.ndarray, X_train_feat_argsort: np.ndarray, y_train: n
"""Train the weak classifiers. """Train the weak classifiers.
Args: Args:
X_train (np.ndarray): Training images X_train (np.ndarray): Training images.
X_train_feat_argsort (np.ndarray): Sorted indexes of the training images features X_train_feat_argsort (np.ndarray): Sorted indexes of the training images features.
y_train (np.ndarray): Training labels y_train (np.ndarray): Training labels.
Returns: Returns: List of trained models
List[np.ndarray]: List of trained models
""" """
training_timestamp = perf_counter_ns() training_timestamp = perf_counter_ns()
training_gaps = [26, -18, 29] training_gaps = [26, -18, 29]
header(training_gaps, ['Training', 'Time spent (ns)', 'Formatted time spent']) header(['Training', 'Time spent (ns)', 'Formatted time spent'], training_gaps)
models = [] models = []
for T in TS: for T in TS:
@ -167,9 +157,9 @@ def train(X_train_feat: np.ndarray, X_train_feat_argsort: np.ndarray, y_train: n
models.append([alphas, final_classifiers]) models.append([alphas, final_classifiers])
if __DEBUG: if __DEBUG:
print('alphas') print("alphas")
print(alphas) print(alphas)
print('final_classifiers') print("final_classifiers")
print(final_classifiers) print(final_classifiers)
time_spent = perf_counter_ns() - training_timestamp time_spent = perf_counter_ns() - training_timestamp
@ -183,15 +173,15 @@ def testing_and_evaluating(models: List[np.ndarray], X_train_feat: np.ndarray, y
"""Benchmark the trained classifiers on the training and testing sets. """Benchmark the trained classifiers on the training and testing sets.
Args: Args:
models (List[np.ndarray]): List of trained models models (List[np.ndarray]): List of trained models.
X_train_feat (np.ndarray): Training features X_train_feat (np.ndarray): Training features.
y_train (np.ndarray): Training labels y_train (np.ndarray): Training labels.
X_test_feat (np.ndarray): Testing features X_test_feat (np.ndarray): Testing features.
y_test (np.ndarray): Testing labels y_test (np.ndarray): Testing labels.
""" """
testing_gaps = [26, -19, 24, -19, 24] testing_gaps = [26, -19, 24, -19, 24]
header(testing_gaps, ['Testing', 'Time spent (ns) (E)', 'Formatted time spent (E)', 'Time spent (ns) (T)', 'Formatted time spent (T)']) header(['Testing', 'Time spent (ns) (E)', 'Formatted time spent (E)', 'Time spent (ns) (T)', 'Formatted time spent (T)'], testing_gaps)
performances = [] performances = []
total_train_timestamp = 0 total_train_timestamp = 0
@ -223,7 +213,7 @@ def testing_and_evaluating(models: List[np.ndarray], X_train_feat: np.ndarray, y
footer(testing_gaps) footer(testing_gaps)
evaluating_gaps = [19, 7, 6, 6, 6, 7, 6, 6, 6] evaluating_gaps = [19, 7, 6, 6, 6, 7, 6, 6, 6]
header(evaluating_gaps, ['Evaluating', 'ACC (E)', 'F1 (E)', 'FN (E)', 'FP (E)', 'ACC (T)', 'F1 (T)', 'FN (T)', 'FP (T)']) header(['Evaluating', 'ACC (E)', 'F1 (E)', 'FN (E)', 'FP (E)', 'ACC (T)', 'F1 (T)', 'FN (T)', 'FP (T)'], evaluating_gaps)
for T, (e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP) in zip(TS, performances): for T, (e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP) in zip(TS, performances):
print(f'│ ViolaJones T = {T:<4}{e_acc:>7.2%}{e_f1:>6.2f}{e_FN:>6,}{e_FP:>6,}', end = '') print(f'│ ViolaJones T = {T:<4}{e_acc:>7.2%}{e_f1:>6.2f}{e_FN:>6,}{e_FP:>6,}', end = '')
@ -234,7 +224,7 @@ def testing_and_evaluating(models: List[np.ndarray], X_train_feat: np.ndarray, y
def main() -> None: def main() -> None:
unit_timestamp = perf_counter_ns() unit_timestamp = perf_counter_ns()
unit_gaps = [27, -18, 29] unit_gaps = [27, -18, 29]
header(unit_gaps, ['Unit testing', 'Time spent (ns)', 'Formatted time spent']) header(['Unit testing', 'Time spent (ns)', 'Formatted time spent'], unit_gaps)
benchmark_function('testing format_time', unit_gaps[0], format_time_test) benchmark_function('testing format_time', unit_gaps[0], format_time_test)
benchmark_function('testing format_time_ns', unit_gaps[0], format_time_ns_test) benchmark_function('testing format_time_ns', unit_gaps[0], format_time_ns_test)
time_spent = perf_counter_ns() - unit_timestamp time_spent = perf_counter_ns() - unit_timestamp
@ -245,12 +235,12 @@ def main() -> None:
X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test = preprocessing() X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test = preprocessing()
models = train(X_train_feat, X_train_feat_argsort, y_train) models = train(X_train_feat, X_train_feat_argsort, y_train)
# X_train_feat, X_test_feat = pickle_multi_loader([f'X_train_feat_{label}', f'X_test_feat_{label}'], OUT_DIR) # X_train_feat, X_test_feat = picke_multi_loader([f"X_train_feat_{label}", f"X_test_feat_{label}"], OUT_DIR)
# indices = pickle_multi_loader(['indices'], OUT_DIR)[0] # indices = picke_multi_loader(["indices"], OUT_DIR)[0]
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices] # X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
testing_and_evaluating(models, X_train_feat, y_train, X_test_feat, y_test) testing_and_evaluating(models, X_train_feat, y_train, X_test_feat, y_test)
unit_test(TS) unit_test(TS)
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@ -1,3 +1,3 @@
numba==0.59.1 numba
scikit-learn==1.4.1.post1 scikit-learn
tqdm==4.66.2 tqdm

189
python/test.py Normal file
View File

@ -0,0 +1,189 @@
import numpy as np
from numba import cuda, config, njit
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
#import matplotlib.pyplot as plt
from tqdm import tqdm
from time import perf_counter_ns
from toolbox import format_time_ns
from pickle import load, dump
from sys import argv
def get(a):
with open(f"{a}.pkl", 'rb') as f:
return load(f)
def save(a, name) -> None:
with open(name, 'wb') as f:
dump(a, f)
def diff(folder, a, label1, label2):
af, bf = get(f"{folder}/{a}_{label1}"), get(f"{folder}/{a}_{label2}")
#print(af)
#print(bf)
print((af - bf).mean())
if __name__ == "__main__":
if len(argv) == 5:
diff(argv[1], argv[4], argv[2], argv[3])
def py_mean(a, b):
s = 0.0
for a_i, b_i in zip(a, b):
s += a_i * b_i
return s / a.shape[0]
def np_mean(a, b):
return np.mean(a * b)
@njit('float64(float64[:], float64[:])', fastmath = True, nogil = True)
def nb_mean(a, b):
return np.mean(a * b)
@njit('float64(float64[:], float64[:])', fastmath = True, nogil = True)
def nb_mean_loop(a, b):
s = 0.0
for a_i, b_i in zip(a, b):
s += a_i * b_i
return s / a.shape[0]
@cuda.jit('void(float64[:], float64[:], float64[:])', fastmath = True)
def cuda_mean_kernel(r, a, b):
s = 0.0
for a_i, b_i in zip(a, b):
s += a_i * b_i
r[0] = s / a.shape[0]
def cuda_mean(a, b):
r = cuda.to_device(np.empty(1, dtype = np.float64))
d_a = cuda.to_device(a)
d_b = cuda.to_device(b)
cuda_mean_kernel[1, 1](r, d_a, d_b)
return r.copy_to_host()[0]
def test_and_compare(labels, fncs, a, b):
m = []
for fnc in tqdm(fncs, leave = False, desc = "Calculating..."):
s = perf_counter_ns()
m.append([fnc(a, b), perf_counter_ns() - s])
print("Results:")
[print(f"\t{label:<10} {m_i:<20} {format_time_ns(time_i)}") for ((m_i, time_i), label) in zip(m, labels)]
print("Comparaison:")
for i, (m_i, label_i) in enumerate(zip(m, labels)):
for j, (m_j, label_j) in enumerate(zip(m, labels)):
if i >= j:
continue
print(f"\t{label_i:<10} vs {label_j:<10} - {abs(m_i[0] - m_j[0])}")
if __name__ == "__main__":
np.set_printoptions(linewidth = 10000, threshold = 1000)
N = int(2**20)
labels = ["Python", "Numpy", "Numba", "Numba loop", "CUDA"]
fncs = [py_mean, np_mean, nb_mean, nb_mean_loop, cuda_mean]
print(f"RANDOM for N={N}")
total_size = (2 * 8 * N)
print(f"Size = {total_size} B")
print(f"Size = {total_size // 1024} kB")
print(f"Size = {total_size // 1024 // 1024} MB")
print(f"Size = {total_size // 1024 // 1024 // 1024} GB")
a, b = np.random.rand(N).astype(np.float64), np.random.rand(N).astype(np.float64)
test_and_compare(labels, fncs, a, b)
del a, b
print(f"\nDETERMINSTIC for N={N}")
total_size = (2 * 8 * N) + (8 * N)
print(f"Size = {total_size} B")
print(f"Size = {total_size // 1024} kB")
print(f"Size = {total_size // 1024 // 1024} MB")
print(f"Size = {total_size // 1024 // 1024 // 1024} GB")
mask = np.arange(N, dtype = np.uint64)
a = np.ones(N, dtype = np.float64)
a[mask < N//2] = 0.1
del mask
b = np.ones(N, dtype = np.float64)
test_and_compare(labels, fncs, a, b)
del a, b
#from ViolaJonesGPU import argsort as argsort_GPU
#from ViolaJonesCPU import argsort as argsort_CPU
#from toolbox import unit_test_argsort_2d, benchmark_function
#labels = ["Numpy", "Numba", "CUDA"]
#a = np.random.randint(2**12, size = (2**20, 2**8), dtype = np.int32)
#m = [benchmark_function(f"Argsort {label}", lambda: f(np.copy(a))) for (label, f) in zip(labels, [
# lambda a: np.argsort(a).astype(np.uint16), argsort_CPU, argsort_GPU
#])]
#for i, (m_i, label_i) in enumerate(zip(m, labels)):
# #for j, (m_j, label_j) in enumerate(zip(m, labels)):
# # if i >= j:
# # continue
# # print(f"\t{label_i:<10} vs {label_j:<10} - {(m_i == m_j).mean()}")
# benchmark_function(f"Unit test {label_i}", lambda: unit_test_argsort_2d(a, m_i))
#for i in tqdm(range(X.shape[0]), leave = False, desc = "Extract image"):
# x = X[i]
# y = Y[i]
# fig = plt.figure()
# plt.imshow(x, cmap = 'gray')
# plt.savefig(f"imgs/{y}/{i}.png")
# plt.close(fig)
#def extract_FD(Xy):
# X_c, Y_c = [], []
# for x,y in Xy:
# X_c.append(x)
# Y_c.append(y)
# X_c = np.asarray(X_c)
# Y_c = np.asarray(Y_c)
# return X_c, Y_c
#X_train, y_train = get('out/X_train'), get('out/y_train')
#X_test, y_test = get('out/X_test'), get('out/y_test')
#X_train, y_train = extract_FD(get('/home/_aspil0w/git/FaceDetection/training'))
#X_test, y_test = extract_FD(get('/home/_aspil0w/git/FaceDetection/test'))
#save(X_train, 'out/X_train'), save(y_train, 'out/y_train')
#save(X_test, 'out/X_test'), save(y_test, 'out/y_test')
#print(X_train.shape, X_train_org.shape, X_train.shape == X_train_org.shape)
#print((X_train == X_train_org).mean())
#print(y_train.shape, y_train_org.shape, y_train.shape == y_train_org.shape)
#print((y_train == y_train_org).mean())
#print(X_test.shape, X_test_org.shape, X_test.shape == X_test_org.shape)
#print((X_test == X_test_org).mean())
#print(y_test.shape, y_test_org.shape, y_test.shape == y_test_org.shape)
#print((y_test == y_test_org).mean())
#@njit('uint16[:](uint8[:, :, :], uint8[:, :, :])')
#def arg_find(X, X_org):
# arg = np.empty(X.shape[0], dtype = np.uint16)
# for i, x in enumerate(X_org):
# found = False
# for j, x_org in enumerate(X):
# if np.all(x == x_org):
# arg[i] = j
# found = True
# break
# assert found, "Image not found"
# return arg
#print("Arg find results train")
#arg_train = arg_find(X_train, X_train_org)
#print((X_train[arg_train] == X_train_org).mean())
#print((y_train[arg_train] == y_train_org).mean())
#print("Arg find results test")
#arg_test = arg_find(X_test, X_test_org)
#print((X_test[arg_test] == X_test_org).mean())
#print((y_test[arg_test] == y_test_org).mean())
#for i in tqdm(range(X_c.shape[0]), leave = False, desc = "Extract image"):
# x = X_c[i]
# y = Y_c[i]
# fig = plt.figure()
# plt.imshow(x, cmap = 'gray')
# plt.savefig(f"imgs2/{y}/{i}.png")
# plt.close(fig)

View File

@ -1,141 +1,90 @@
from typing import Any, Callable, List, Union, Final from typing import Any, Callable, List, Union, Final
from time import perf_counter_ns from time import perf_counter_ns
from numba import njit
import numpy as np import numpy as np
from sys import stderr from sys import stderr
import pickle import pickle
import os import os
from config import MODEL_DIR, OUT_DIR, __DEBUG from config import MODEL_DIR, OUT_DIR
from decorators import njit from decorators import njit
time_formats: Final = ["ns", "µs", "ms", "s", "m", "h", "j", "w", "M", "y", "c"]
def formatted_row(gaps: list[int], titles: list[str], separator: str = '') -> None: def formatted_row(gaps: list[int], titles: list[str], separator: str = '') -> None:
"""Print a formatted row of titles with of gaps seperated by a separator.
Args:
gaps: List of size gaps
titles: List of titles
separator: Separator character between each gap
"""
for gap, title in zip(gaps, titles): for gap, title in zip(gaps, titles):
print(f"{separator} {title:{'>' if gap < 0 else '<'}{abs(gap)}} ", end = '') print(f"{separator} {title:{'>' if gap < 0 else '<'}{abs(gap)}} ", end = '')
print(separator) print(separator)
def formatted_line(gaps: list[int], left: str, middle: str, separator: str, right: str) -> None: def formatted_line(gaps: list[int], right: str, middle: str, separator: str, left: str) -> None:
"""Print a formatted line of repeated characters. print(right, end = '')
Args:
gaps: List of size gaps
left: Character on the left
middle: Character between each separator
separator: Separator character between each gap
right: Character on the right
"""
print(left, end = '')
last_gap = len(gaps) - 1 last_gap = len(gaps) - 1
for i, gap in enumerate(gaps): for i, gap in enumerate(gaps):
print(f'{separator * (abs(gap) + 2)}', end = '') print(f'{separator * (abs(gap) + 2)}', end = '')
if i != last_gap: if i != last_gap:
print(middle, end = '') print(middle, end = '')
print(right) print(left)
def header(gaps: list[int], titles: list[str]) -> None: def header(titles: list[str], gaps: list[int]) -> None:
"""Print a formatted header with the given titles and sizes.
Args:
gaps: List of size gaps
titles: List of titles
"""
formatted_line(gaps, '', '', '', '') formatted_line(gaps, '', '', '', '')
formatted_row(gaps, titles) formatted_row(gaps, titles)
formatted_line(gaps, '', '', '', '') formatted_line(gaps, '', '', '', '')
def footer(gaps: list[int]) -> None: def footer(gaps: list[int]) -> None:
"""Print a formatted footer with the given sizes.
Args:
gaps: List of size gaps
"""
formatted_line(gaps, '', '', '', '') formatted_line(gaps, '', '', '', '')
time_formats: Final = ['ns', 'µs', 'ms', 's', 'm', 'h', 'j', 'w', 'M', 'y', 'c']
time_numbers: Final = np.array([1, 1e3, 1e6, 1e9, 6e10, 36e11, 864e11, 6048e11, 26784e11, 31536e12, 31536e14], dtype = np.uint64) time_numbers: Final = np.array([1, 1e3, 1e6, 1e9, 6e10, 36e11, 864e11, 6048e11, 26784e11, 31536e12, 31536e14], dtype = np.uint64)
@njit('str(uint64)') @njit('str(uint64)')
def format_time_ns(time: int) -> str: def format_time_ns(time: int) -> str:
"""Format the time in nanoseconds in human readable format. """Format the time in nanoseconds in human readable format.
Args: Args:
time (int): Time in nanoseconds time (int): Time in nanoseconds.
Returns: Returns:
str: The formatted human readable string str: The formatted human readable string.
""" """
assert time >= 0, 'Incorrect time stamp' assert time >= 0, "Incorrect time stamp"
if time == 0: if time == 0:
return '0ns' return "0ns"
s = '' s = ""
for i in range(time_numbers.shape[0])[::-1]: for i in range(time_numbers.shape[0])[::-1]:
if time >= time_numbers[i]: if time >= time_numbers[i]:
res = int(time // time_numbers[i]) res = int(time // time_numbers[i])
time = time % time_numbers[i] time = time % time_numbers[i]
s += f'{res}{time_formats[i]} ' s += f"{res}{time_formats[i]} "
assert time == 0, 'Leftover in formatting time !' assert time == 0, "Leftover in formatting time !"
return s.rstrip() return s.rstrip()
@njit('str(uint64)') def picke_multi_loader(filenames: List[str], save_dir: str = MODEL_DIR) -> List[Any]:
def format_time(time: int) -> str:
"""Format the time in seconds in human readable format.
Args:
time (int): Time in seconds
Returns:
str: The formatted human readable string
"""
assert time >= 0, 'Incorrect time stamp'
if time == 0:
return '0s'
s = ''
for i in range(3, time_numbers.shape[0])[::-1]:
time_number = time_numbers[i] / int(1e9)
if time >= time_number:
res = int(time // time_number)
time = time % time_number
s += f'{res}{time_formats[i]} '
assert time == 0, 'Leftover in formatting time !'
return s.rstrip()
def pickle_multi_loader(filenames: List[str], save_dir: str = MODEL_DIR) -> List[Any]:
"""Load multiple pickle data files. """Load multiple pickle data files.
Args: Args:
filenames (List[str]): List of all the filename to load filenames (List[str]): List of all the filename to load.
save_dir (str, optional): Path of the files to load. Defaults to MODELS_DIR (see config.py) save_dir (str, optional): Path of the files to load. Defaults to MODELS_DIR (see config.py).
Returns: Returns:
List[Any]. List of loaded pickle data files List[Any]. List of loaded pickle data files.
""" """
b = [] b = []
for f in filenames: for f in filenames:
filepath = f'{save_dir}/{f}.pkl' filepath = f"{save_dir}/{f}.pkl"
if os.path.exists(filepath): if os.path.exists(filepath):
with open(filepath, 'rb') as file_bytes: with open(filepath, "rb") as filebyte:
b.append(pickle.load(file_bytes)) b.append(pickle.load(filebyte))
else: else:
b.append(None) b.append(None)
return b return b
def benchmark_function(step_name: str, column_width: int, fnc: Callable) -> Any: def benchmark_function(step_name: str, column_width: int, fnc: Callable) -> Any:
"""Benchmark a function and display the result in stdout. """Benchmark a function and display the result of stdout.
Args: Args:
step_name (str): Name of the function to call step_name (str): Name of the function to call.
fnc (Callable): Function to call fnc (Callable): Function to call.
Returns: Returns:
Any: Result of the function Any: Result of the function.
""" """
print(f'{step_name}...', file = stderr, end = '\r') print(f'{step_name}...', file = stderr, end = '\r')
s = perf_counter_ns() s = perf_counter_ns()
@ -149,34 +98,34 @@ def state_saver(step_name: str, column_width: int, filename: Union[str, List[str
"""Either execute a function then saves the result or load the already existing result. """Either execute a function then saves the result or load the already existing result.
Args: Args:
step_name (str): Name of the function to call step_name (str): Name of the function to call.
filename (Union[str, List[str]]): Name or list of names of the filenames where the result(s) are saved filename (Union[str, List[str]]): Name or list of names of the filenames where the result(s) are saved.
fnc ([type]): Function to call fnc ([type]): Function to call.
force_redo (bool, optional): Recall the function even if the result(s) is already saved. Defaults to False force_redo (bool, optional): Recall the function even if the result(s) is already saved. Defaults to False.
save_dir (str, optional): Path of the directory to save the result(s). Defaults to OUT_DIR (see config.py) save_dir (str, optional): Path of the directory to save the result(s). Defaults to OUT_DIR (see config.py).
Returns: Returns:
Any: The result(s) of the called function Any: The result(s) of the called function
""" """
if isinstance(filename, str): if isinstance(filename, str):
if not os.path.exists(f'{save_dir}/{filename}.pkl') or force_redo: if not os.path.exists(f"{save_dir}/{filename}.pkl") or force_redo:
b = benchmark_function(step_name, column_width, fnc) b = benchmark_function(step_name, column_width, fnc)
if save_state: if save_state:
with open(f"{save_dir}/{filename}.pkl", 'wb') as f:
print(f'Saving results of {step_name}', file = stderr, end = '\r') print(f'Saving results of {step_name}', file = stderr, end = '\r')
with open(f'{save_dir}/{filename}.pkl', 'wb') as f:
pickle.dump(b, f) pickle.dump(b, f)
print(' ' * 100, file = stderr, end = '\r') print(' ' * 100, file = stderr, end = '\r')
return b return b
else: else:
with open(f"{save_dir}/{filename}.pkl", "rb") as f:
print(f'Loading results of {step_name}', file = stderr, end = '\r') print(f'Loading results of {step_name}', file = stderr, end = '\r')
with open(f'{save_dir}/{filename}.pkl', 'rb') as f:
res = pickle.load(f) res = pickle.load(f)
print(f"{step_name:<{column_width}}{'None':>18}{'loaded saved state':<29}") print(f"{step_name:<{column_width}}{'None':>18}{'loaded saved state':<29}")
return res return res
elif isinstance(filename, list): elif isinstance(filename, list):
abs = False abs = False
for fn in filename: for fn in filename:
if not os.path.exists(f'{save_dir}/{fn}.pkl'): if not os.path.exists(f"{save_dir}/{fn}.pkl"):
abs = True abs = True
break break
if abs or force_redo: if abs or force_redo:
@ -184,7 +133,7 @@ def state_saver(step_name: str, column_width: int, filename: Union[str, List[str
if save_state: if save_state:
print(f'Saving results of {step_name}', file = stderr, end = '\r') print(f'Saving results of {step_name}', file = stderr, end = '\r')
for bi, fnI in zip(b, filename): for bi, fnI in zip(b, filename):
with open(f'{save_dir}/{fnI}.pkl', 'wb') as f: with open(f"{save_dir}/{fnI}.pkl", 'wb') as f:
pickle.dump(bi, f) pickle.dump(bi, f)
print(' ' * 100, file = stderr, end = '\r') print(' ' * 100, file = stderr, end = '\r')
return b return b
@ -193,31 +142,21 @@ def state_saver(step_name: str, column_width: int, filename: Union[str, List[str
b = [] b = []
print(f'Loading results of {step_name}', file = stderr, end = '\r') print(f'Loading results of {step_name}', file = stderr, end = '\r')
for fn in filename: for fn in filename:
with open(f'{save_dir}/{fn}.pkl', 'rb') as f: with open(f"{save_dir}/{fn}.pkl", "rb") as f:
b.append(pickle.load(f)) b.append(pickle.load(f))
print(' ' * 100, file = stderr, end = '\r') print(' ' * 100, file = stderr, end = '\r')
return b return b
else: else:
assert False, f'Incompatible filename type = {type(filename)}' assert False, f"Incompatible filename type = {type(filename)}"
@njit('boolean(int32[:, :], uint16[:, :])') @njit('boolean(int32[:, :], uint16[:, :])')
def unit_test_argsort_2d(arr: np.ndarray, indices: np.ndarray) -> bool: def unit_test_argsort_2d(arr: np.ndarray, indices: np.ndarray) -> bool:
"""Test if a given 2D array of indices sort a given 2D array.
Args:
arr (np.ndarray): 2D Array of data
indices (np.ndarray): 2D Indices that sort the array
Returns:
bool: Whether the test was successful
"""
n = indices.shape[0] n = indices.shape[0]
total = indices.shape[0] * indices.shape[1] total = indices.shape[0] * indices.shape[1]
for i, sub_indices in enumerate(indices): for i, sub_indices in enumerate(indices):
for j in range(sub_indices.shape[0] - 1): for j in range(sub_indices.shape[0] - 1):
if arr[i, sub_indices[j]] <= arr[i, sub_indices[j + 1]]: if arr[i, sub_indices[j]] <= arr[i, sub_indices[j + 1]]:
n += 1 n += 1
if __DEBUG:
if n != total: if n != total:
print(n, total, n / (total)) print(n, total, n / (total))
return n == total return n == total

View File

@ -1,132 +1,67 @@
from typing import Any from typing import Any
from toolbox import format_time, format_time_ns from toolbox import format_time_ns
def Assert(name: str, expected: Any, result: Any): def Assert(name: str, expected: Any, result: Any):
"""Test if a given result is equal of the expected one and log result
Args:
name (str): name of the unit test
expected (Any): expected result of the function call
result (Any): result of the function
"""
if expected != result: if expected != result:
print(f"For test name {name} Expected '{expected}' but got '{result}' instead") print(f"For test name {name} Expected '{expected}' but got '{result}' instead")
assert False assert False
def format_time_test() -> None:
"""Test suite for the format_time output
See https://en.wikipedia.org/wiki/Unit_of_time for details
"""
Assert("format_time null", "0s", format_time(0))
Assert("format_time second", "1s", format_time(1))
Assert("format_time decasecond", "10s", format_time(10))
Assert("format_time minute", "1m", format_time(60))
Assert("format_time milliday", "1m 26s", format_time(86)) # missing 0.4s due to precision
Assert("format_time hectosecond", "1m 40s", format_time(100))
Assert("format_time kilosecond", "16m 40s", format_time(int(1e3)))
Assert("format_time hour", "1h", format_time(3600))
Assert("format_time day", "1j", format_time(86400))
Assert("format_time week/sennight", "1w", format_time(604800))
Assert("format_time megasecond", "1w 4j 13h 46m 40s", format_time(int(1e6)))
Assert("format_time fortnight", "2w", format_time(1209600))
Assert("format_time lunar month (draconitic)", "3w 6j 5h 5m 35s", format_time(2351135)) # missing 0.8 due to precision
Assert("format_time lunar month (tropical)", "3w 6j 7h 43m 4s", format_time(2360584)) # missing 0.7 due to precision
Assert("format_time lunar month (sidereal)", "3w 6j 7h 43m 11s", format_time(2360591)) # missing 0.6 to precision
Assert("format_time lunar month (anomalistic)", "3w 6j 13h 18m 33s", format_time(2380713)) # missing 0.2 due to precision
Assert("format_time lunar month (synodic)", "4w 1j 12h 44m 2s", format_time(2551442)) # missing 0.9 due to precision
Assert("format_time month", "1M", format_time(2678400))
Assert("format_time quarantine", "1M 1w 2j", format_time(int(3456e3)))
Assert("format_time semester", "4M 2j", format_time(10886400))
Assert("format_time lunar year", "11M 1w 6j 8h 52m 48s", format_time(30617568))
Assert("format_time year", "1y", format_time(int(31536e3)))
Assert("format_time tropical year", "1y 5h 48m 45s", format_time(31556925)) # missing 0.216 due to precision
Assert("format_time gregorian year", "1y 5h 49m 12s", format_time(31556952))
Assert("format_time sidereal year", "1y 6h 9m 9s", format_time(31558149)) # missing 0.7635456 due to precision
Assert("format_time leap year", "1y 1j", format_time(31622400))
Assert("format_time olympiad", "4y", format_time(int(126144e3)))
Assert("format_time lusturm", "5y", format_time(int(15768e4)))
Assert("format_time decade", "10y", format_time(int(31536e4)))
Assert("format_time indiction", "15y", format_time(int(47304e4)))
Assert("format_time score", "20y", format_time(int(63072e4)))
Assert("format_time gigasecond", "31y 8M 1w 4j 1h 46m 40s", format_time(int(1e9)))
Assert("format_time jubilee", "50y", format_time(int(15768e5)))
Assert("format_time century", "1c", format_time(int(31536e5)))
Assert("format_time millennium", "10c", format_time(int(31536e6)))
Assert("format_time age", "257c 72y", format_time(int(812745792e3)))
Assert("format_time terasecond", "3170c 97y 10M 3w 4j 17h 46m 40s", format_time(int(1e13)))
Assert("format_time megaannum", "10000c", format_time(int(31536e9)))
Assert("format_time petasecond", "317097c 91y 11M 2w 4j 1h 46m 40s", format_time(int(1e15)))
Assert("format_time galactic year", "2300000c", format_time(int(725328e10)))
Assert("format_time eon", "10000000c", format_time(int(31536e12)))
Assert("format_time kalpa", "43200000c", format_time(int(13623552e10)))
Assert("format_time exasecond", "317097919c 83y 9M 1h 46m 40s", format_time(int(1e18)))
# Cannot use number bigger than currently supported ISO Python
#Assert("format_time zettasecond", "", format_time(1e21))
#Assert("format_time yottasecond", "", format_time(1e24))
#Assert("format_time ronnasecond", "", format_time(1e27))
#Assert("format_time quettasecond", "", format_time(1e30))
# uint64_t_MAX == 2**64 == 18446744073709551615 == -1
Assert("format_time max", "5849424173c 55y 3w 5j 7h 16s", format_time(int(2**64 - 1)))
def format_time_ns_test() -> None: def format_time_ns_test() -> None:
"""Test suite for the format_time_ns output # https://en.wikipedia.org/wiki/Unit_of_time
Assert("format_time_ns null", "0ns", format_time_ns(0));
See https://en.wikipedia.org/wiki/Unit_of_time for details Assert("format_time_ns nanosecond", "1ns", format_time_ns(1));
""" Assert("format_time_ns shake", "10ns", format_time_ns(10));
Assert("format_time_ns null", "0ns", format_time_ns(0)) Assert("format_time_ns microsecond", "1µs", format_time_ns(int(1e3)));
Assert("format_time_ns nanosecond", "1ns", format_time_ns(1)) Assert("format_time_ns millisecond", "1ms", format_time_ns(int(1e6)));
Assert("format_time_ns shake", "10ns", format_time_ns(10)) Assert("format_time_ns centisecond", "10ms", format_time_ns(int(1e7)));
Assert("format_time_ns microsecond", "1µs", format_time_ns(int(1e3))) Assert("format_time_ns decisecond", "100ms", format_time_ns(int(1e8)));
Assert("format_time_ns millisecond", "1ms", format_time_ns(int(1e6))) Assert("format_time_ns second", "1s", format_time_ns(int(1e9)));
Assert("format_time_ns centisecond", "10ms", format_time_ns(int(1e7))) Assert("format_time_ns decasecond", "10s", format_time_ns(int(1e10)));
Assert("format_time_ns decisecond", "100ms", format_time_ns(int(1e8))) Assert("format_time_ns minute", "1m", format_time_ns(int(6e10)));
Assert("format_time_ns second", "1s", format_time_ns(int(1e9))) Assert("format_time_ns milliday", "1m 26s 400ms", format_time_ns(int(864e8)));
Assert("format_time_ns decasecond", "10s", format_time_ns(int(1e10))) Assert("format_time_ns hectosecond", "1m 40s", format_time_ns(int(1e11)));
Assert("format_time_ns minute", "1m", format_time_ns(int(6e10))) Assert("format_time_ns kilosecond", "16m 40s", format_time_ns(int(1e12)));
Assert("format_time_ns milliday", "1m 26s 400ms", format_time_ns(int(864e8))) Assert("format_time_ns hour", "1h", format_time_ns(int(36e11)));
Assert("format_time_ns hectosecond", "1m 40s", format_time_ns(int(1e11))) Assert("format_time_ns day", "1j", format_time_ns(int(864e11)));
Assert("format_time_ns kilosecond", "16m 40s", format_time_ns(int(1e12))) Assert("format_time_ns week/sennight", "1w", format_time_ns(int(6048e11)));
Assert("format_time_ns hour", "1h", format_time_ns(int(36e11))) Assert("format_time_ns megasecond", "1w 4j 13h 46m 40s", format_time_ns(int(1e15)));
Assert("format_time_ns day", "1j", format_time_ns(int(864e11))) Assert("format_time_ns fortnight", "2w", format_time_ns(int(12096e11)));
Assert("format_time_ns week/sennight", "1w", format_time_ns(int(6048e11))) Assert("format_time_ns lunar month (draconitic)", "3w 6j 5h 5m 35s 800ms", format_time_ns(int(23511358e8)));
Assert("format_time_ns megasecond", "1w 4j 13h 46m 40s", format_time_ns(int(1e15))) Assert("format_time_ns lunar month (tropical)", "3w 6j 7h 43m 4s 700ms", format_time_ns(int(23605847e8)));
Assert("format_time_ns fortnight", "2w", format_time_ns(int(12096e11))) Assert("format_time_ns lunar month (sidereal)", "3w 6j 7h 43m 11s 600ms", format_time_ns(int(23605916e8)));
Assert("format_time_ns lunar month (draconitic)", "3w 6j 5h 5m 35s 800ms", format_time_ns(int(23511358e8))) Assert("format_time_ns lunar month (anomalistic)", "3w 6j 13h 18m 33s 200ms", format_time_ns(int(23807132e8)));
Assert("format_time_ns lunar month (tropical)", "3w 6j 7h 43m 4s 700ms", format_time_ns(int(23605847e8))) Assert("format_time_ns lunar month (synodic)", "4w 1j 12h 44m 2s 900ms", format_time_ns(int(25514429e8)));
Assert("format_time_ns lunar month (sidereal)", "3w 6j 7h 43m 11s 600ms", format_time_ns(int(23605916e8))) Assert("format_time_ns month", "1M", format_time_ns(int(26784e11)));
Assert("format_time_ns lunar month (anomalistic)", "3w 6j 13h 18m 33s 200ms", format_time_ns(int(23807132e8))) Assert("format_time_ns quarantine", "1M 1w 2j", format_time_ns(int(3456e12)));
Assert("format_time_ns lunar month (synodic)", "4w 1j 12h 44m 2s 900ms", format_time_ns(int(25514429e8))) Assert("format_time_ns semester", "4M 2j", format_time_ns(int(108864e11)));
Assert("format_time_ns month", "1M", format_time_ns(int(26784e11))) Assert("format_time_ns lunar year", "11M 1w 6j 8h 52m 48s", format_time_ns(int(30617568e9)));
Assert("format_time_ns quarantine", "1M 1w 2j", format_time_ns(int(3456e12))) Assert("format_time_ns year", "1y", format_time_ns(int(31536e12)));
Assert("format_time_ns semester", "4M 2j", format_time_ns(int(108864e11))) Assert("format_time_ns tropical year", "1y 5h 48m 45s 216ms", format_time_ns(int(31556925216e6)));
Assert("format_time_ns lunar year", "11M 1w 6j 8h 52m 48s", format_time_ns(int(30617568e9))) Assert("format_time_ns gregorian year", "1y 5h 49m 12s", format_time_ns(int(31556952e9)));
Assert("format_time_ns year", "1y", format_time_ns(int(31536e12))) Assert("format_time_ns sidereal year", "1y 6h 9m 9s 763ms 545µs 600ns", format_time_ns(int(315581497635456e2)));
Assert("format_time_ns tropical year", "1y 5h 48m 45s 216ms", format_time_ns(int(31556925216e6))) Assert("format_time_ns leap year", "1y 1j", format_time_ns(int(316224e11)));
Assert("format_time_ns gregorian year", "1y 5h 49m 12s", format_time_ns(int(31556952e9))) Assert("format_time_ns olympiad", "4y", format_time_ns(int(126144e12)));
Assert("format_time_ns sidereal year", "1y 6h 9m 9s 763ms 545µs 600ns", format_time_ns(int(315581497635456e2))) Assert("format_time_ns lusturm", "5y", format_time_ns(int(15768e13)));
Assert("format_time_ns leap year", "1y 1j", format_time_ns(int(316224e11))) Assert("format_time_ns decade", "10y", format_time_ns(int(31536e13)));
Assert("format_time_ns olympiad", "4y", format_time_ns(int(126144e12))) Assert("format_time_ns indiction", "15y", format_time_ns(int(47304e13)));
Assert("format_time_ns lusturm", "5y", format_time_ns(int(15768e13))) Assert("format_time_ns score", "20y", format_time_ns(int(63072e13)));
Assert("format_time_ns decade", "10y", format_time_ns(int(31536e13))) Assert("format_time_ns gigasecond", "31y 8M 1w 4j 1h 46m 40s", format_time_ns(int(1e18)));
Assert("format_time_ns indiction", "15y", format_time_ns(int(47304e13))) Assert("format_time_ns jubilee", "50y", format_time_ns(int(15768e14)));
Assert("format_time_ns score", "20y", format_time_ns(int(63072e13))) Assert("format_time_ns century", "1c", format_time_ns(int(31536e14)));
Assert("format_time_ns gigasecond", "31y 8M 1w 4j 1h 46m 40s", format_time_ns(int(1e18))) Assert("format_time_ns millennium", "10c", format_time_ns(int(31536e15)));
Assert("format_time_ns jubilee", "50y", format_time_ns(int(15768e14))) Assert("format_time_ns age", "257c 72y", format_time_ns(int(812745792e12)));
Assert("format_time_ns century", "1c", format_time_ns(int(31536e14))) Assert("format_time_ns terasecond", "3170c 97y 10M 3w 4j 17h 46m 40s", format_time_ns(int(1e22)));
Assert("format_time_ns millennium", "10c", format_time_ns(int(31536e15))) Assert("format_time_ns megaannum", "10000c", format_time_ns(int(31536e18)));
Assert("format_time_ns age", "257c 72y", format_time_ns(int(812745792e12)))
Assert("format_time_ns terasecond", "3170c 97y 10M 3w 4j 17h 46m 40s", format_time_ns(int(1e22)))
Assert("format_time_ns megaannum", "10000c", format_time_ns(int(31536e18)))
# Cannot use number bigger than currently supported ISO Python # Cannot use number bigger than currently supported ISO Python
# Assert("format_time_ns petasecond", "317097c 91y 11M 2w 4j 1h 46m 40s", format_time_ns(int(1e24))) #Assert("format_time_ns petasecond", "317097c 91y 11M 2w 4j 1h 46m 40s", format_time_ns(int(1e24)));
# Assert("format_time_ns galactic year", "2300000c", format_time_ns(int(725328e19))) #Assert("format_time_ns galactic year", "2300000c", format_time_ns(int(725328e19)));
# Assert("format_time_ns eon", "10000000c", format_time_ns(int(31536e21))) #Assert("format_time_ns eon", "10000000c", format_time_ns(int(31536e21)));
# Assert("format_time_ns kalpa", "43200000c", format_time_ns(int(13623552e19))) #Assert("format_time_ns kalpa", "43200000c", format_time_ns(int(13623552e19)));
# Assert("format_time_ns exasecond", "317097919c 83y 9M 1h 46m 40s", format_time_ns(int(1e27))) #Assert("format_time_ns exasecond", "317097919c 83y 9M 1h 46m 40s", format_time_ns(int(1e27)));
# Assert("format_time_ns zettasecond", "", format_time_ns(int(1e30))) #Assert("format_time_ns zettasecond", "", format_time_ns(int(1e30)));
# Assert("format_time_ns yottasecond", "", format_time_ns(int(1e33))) #Assert("format_time_ns yottasecond", "", format_time_ns(int(1e33)));
# Assert("format_time_ns ronnasecond", "", format_time_ns(int(1e36))) #Assert("format_time_ns ronnasecond", "", format_time_ns(int(1e36)));
# Assert("format_time_ns quettasecond", "", format_time_ns(int(1e39))) #Assert("format_time_ns quettasecond", "", format_time_ns(int(1e39)));
# uint64_t_MAX == 2**64 == 18446744073709551615 == -1 # uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1
Assert("format_time_ns max", "5c 84y 11M 2j 23h 34m 33s 709ms 551µs 615ns", format_time_ns(2**64 - 1)) Assert("format_time_ns max", "5c 84y 11M 2j 23h 34m 33s 709ms 551µs 615ns", format_time_ns(2**64 - 1))