Compare commits
No commits in common. "4a427478377f97d3782de48692a5bdbc63ad5d4e" and "211dcad893a4f5d64601923a2faafc163eac7a63" have entirely different histories.
4a42747837
...
211dcad893
0
.gitignore
vendored
Normal file → Executable file
0
.gitignore
vendored
Normal file → Executable file
0
README.fr.md
Normal file → Executable file
0
README.fr.md
Normal file → Executable file
@ -1,17 +0,0 @@
|
|||||||
FROM nvidia/cuda:12.4.1-devel-ubi9 as builder
|
|
||||||
|
|
||||||
WORKDIR /home/ViolaJones/cpp
|
|
||||||
|
|
||||||
COPY *.cu *.cpp *.hpp Makefile ./
|
|
||||||
RUN make -j "$(nproc)"
|
|
||||||
|
|
||||||
FROM nvidia/cuda:12.4.1-base-ubi9
|
|
||||||
|
|
||||||
WORKDIR /home/ViolaJones/cpp
|
|
||||||
|
|
||||||
RUN dnf install -y make-1:4.3-7.el9 && dnf clean all
|
|
||||||
COPY --from=builder /home/ViolaJones/cpp/bin ./bin
|
|
||||||
COPY Makefile .
|
|
||||||
|
|
||||||
ENTRYPOINT ["make"]
|
|
||||||
CMD ["start"]
|
|
132
cpp/Makefile
132
cpp/Makefile
@ -1,135 +1,79 @@
|
|||||||
CC := nvcc -m64 -t=0 -std=c++17 -Xcompiler -m64,-std=c++17
|
CC := nvcc -m64 -std=c++17 -ccbin g++-12 -Xcompiler -m64,-std=c++17
|
||||||
OBJ_DIR := bin
|
OBJ_DIR := bin
|
||||||
|
$(shell mkdir -p $(OBJ_DIR))
|
||||||
MODELS_DIR := models
|
MODELS_DIR := models
|
||||||
OUT_DIR := out
|
OUT_DIR := out
|
||||||
SRC_DIR := .
|
SRC_DIR := .
|
||||||
DATA_PATH := ../data
|
#CFLAGS := -O0 -Werror=all-warnings -g -G
|
||||||
#CFLAGS := -O0 -g -G -Xptxas=-w -Xcompiler -O0,-rdynamic,-g
|
#CFLAGS := $(CFLAGS) -pg
|
||||||
#CFLAGS := -O0 -g -G -pg -Xptxas=-w -Xcompiler -O0,-rdynamic,-g
|
#CFLAGS := $(CFLAGS) -Xptxas=-w
|
||||||
CFLAGS := -dlto -O2 -Xcompiler -O2
|
#CFLAGS := $(CFLAGS) -Xcompiler -Wall,-O0,-g,-Werror,-Werror=implicit-fallthrough=0,-Wextra,-rdynamic
|
||||||
#CFLAGS := -dlto -O2 -g -Xcompiler -O2,-g,-ggdb
|
CFLAGS := -O4 -Xcompiler -O4
|
||||||
CFLAGS := $(CFLAGS) -MMD -MP -Werror=all-warnings -Xcompiler -Wall,-Werror,-Werror=implicit-fallthrough=0,-Wextra
|
|
||||||
EXEC := $(OBJ_DIR)/ViolaJones
|
EXEC := $(OBJ_DIR)/ViolaJones
|
||||||
DATA := $(DATA_PATH)/X_train.bin $(DATA_PATH)/X_test.bin $(DATA_PATH)/y_train.bin $(DATA_PATH)/y_test.bin
|
DATA := ../data/X_train.bin ../data/X_test.bin ../data/y_train.bin ../data/y_test.bin
|
||||||
SRC := $(shell find $(SRC_DIR) -name '*.cpp' -o -name '*.cu')
|
SRC := $(shell find $(SRC_DIR) -name "*.cpp" -o -name "*.cu" )
|
||||||
OBJ_EXT := o
|
OBJ_EXT := o
|
||||||
ifeq ($(OS), Windows_NT)
|
ifeq ($(OS), Windows_NT)
|
||||||
EXEC := $(EXEC).exe
|
EXEC:=$(EXEC).exe
|
||||||
OBJ_EXT := obj
|
OBJ_EXT:=obj
|
||||||
endif
|
endif
|
||||||
OBJ := $(SRC:$(SRC_DIR)/%.cpp=$(OBJ_DIR)/%.$(OBJ_EXT))
|
OBJ := $(SRC:$(SRC_DIR)/%.cpp=$(OBJ_DIR)/%.$(OBJ_EXT))
|
||||||
OBJ := $(OBJ:$(SRC_DIR)/%.cu=$(OBJ_DIR)/%.$(OBJ_EXT))
|
OBJ := $(OBJ:$(SRC_DIR)/%.cu=$(OBJ_DIR)/%.$(OBJ_EXT))
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all start reset clean mrproper debug check
|
||||||
all: $(EXEC)
|
|
||||||
|
|
||||||
$(OBJ_DIR):
|
all: $(EXEC) $(DATA)
|
||||||
@mkdir -v $@
|
|
||||||
|
|
||||||
# Compiling host code
|
# Compiling host code
|
||||||
$(OBJ_DIR)/%.$(OBJ_EXT): $(SRC_DIR)/%.cpp | $(OBJ_DIR) check-nvcc-works
|
$(OBJ_DIR)/%.$(OBJ_EXT): $(SRC_DIR)/%.cpp
|
||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
@$(CC) $(CFLAGS) -c $< -o $@
|
@$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
# Compiling gpu code
|
# Compiling gpu code
|
||||||
$(OBJ_DIR)/%.$(OBJ_EXT): $(SRC_DIR)/%.cu | $(OBJ_DIR) check-nvcc-works
|
$(OBJ_DIR)/%.$(OBJ_EXT): $(SRC_DIR)/%.cu
|
||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
@$(CC) $(CFLAGS) -c $< -o $@
|
@$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
$(EXEC): $(OBJ) | check-nvcc-works
|
$(EXEC): $(OBJ)
|
||||||
@echo Linking objects files to $@
|
@echo Linking objects files to $@
|
||||||
@$(CC) $(CFLAGS) $^ -o $@
|
@$(CC) $(CFLAGS) $^ -o $@
|
||||||
|
|
||||||
$(DATA):
|
$(DATA):
|
||||||
@echo 'Missing $(DATA) files, use downloader first' && exit 1
|
@bash ../download_data.sh ..
|
||||||
|
|
||||||
.PHONY: start
|
|
||||||
start: $(EXEC) $(DATA)
|
start: $(EXEC) $(DATA)
|
||||||
@./$(EXEC)
|
@./$(EXEC)
|
||||||
|
|
||||||
.PHONY: debug
|
profile: start
|
||||||
|
@gprof $(EXEC) gmon.out | gprof2dot | dot -Tpng -o output.png
|
||||||
|
#@gprof $(EXEC) gmon.out > analysis.txt
|
||||||
|
|
||||||
debug: $(EXEC) $(DATA)
|
debug: $(EXEC) $(DATA)
|
||||||
#@cuda-gdb -q $(EXEC)
|
#@cuda-gdb -q $(EXEC)
|
||||||
@gdb -q --tui $(EXEC)
|
@gdb -q --tui $(EXEC)
|
||||||
|
|
||||||
.PHONY: profile
|
check: $(EXEC) $(DATA)
|
||||||
profile: start | check-gprof-works check-gprof2dot-works check-dot-works
|
|
||||||
@gprof $(EXEC) gmon.out | gprof2dot | dot -T png -o output.png
|
|
||||||
|
|
||||||
.PHONY: check
|
|
||||||
check: $(EXEC) $(DATA) | check-valgrind-works
|
|
||||||
@valgrind -q -s --leak-check=full --show-leak-kinds=all $(EXEC)
|
@valgrind -q -s --leak-check=full --show-leak-kinds=all $(EXEC)
|
||||||
|
|
||||||
.PHONY: cudacheck
|
cudacheck: $(EXEC) $(DATA)
|
||||||
cudacheck: $(EXEC) $(DATA) | check-computer-sanitizer-works
|
@cuda-memcheck --destroy-on-device-error kernel --tool memcheck --leak-check full --report-api-errors all $(EXEC)
|
||||||
@compute-sanitizer --destroy-on-device-error kernel --tool memcheck --leak-check full --report-api-errors all --track-stream-ordered-races all --target-processes all $(EXEC)
|
#@cuda-memcheck --destroy-on-device-error kernel --tool racecheck --racecheck-report all $(EXEC)
|
||||||
#@compute-sanitizer --destroy-on-device-error kernel --tool racecheck --racecheck-detect-level info --racecheck-report all $(EXEC)
|
#@cuda-memcheck --destroy-on-device-error kernel --tool initcheck --track-unused-memory yes $(EXEC)
|
||||||
#@compute-sanitizer --destroy-on-device-error kernel --tool initcheck --track-unused-memory yes $(EXEC)
|
#@cuda-memcheck --destroy-on-device-error kernel --tool synccheck $(EXEC)
|
||||||
#@compute-sanitizer --destroy-on-device-error kernel --tool synccheck $(EXEC)
|
#@compute-sanitizer --destroy-on-device-error kernel --tool memcheck --leak-check full --report-api-errors all --track-stream-ordered-races all $(EXEC)
|
||||||
|
#@compute-sanitizer --destroy-on-device-error kernel --tool racecheck --racecheck-detect-level info --racecheck-report all $(EXEC)
|
||||||
|
#@compute-sanitizer --destroy-on-device-error kernel --tool initcheck --track-unused-memory yes $(EXEC)
|
||||||
|
#@compute-sanitizer --destroy-on-device-error kernel --tool synccheck $(EXEC)
|
||||||
|
|
||||||
.PHONY: log
|
r2: $(EXEC) $(DATA)
|
||||||
log: $(DATA) reset
|
@r2 $(EXEC)
|
||||||
@echo 'Building GPU'
|
|
||||||
@sed -i 's/GPU_BOOSTED false/GPU_BOOSTED true/' config.hpp
|
|
||||||
@make -s -j "$(shell nproc)"
|
|
||||||
@echo 'Logging GPU'
|
|
||||||
@make -s start > log_gpu
|
|
||||||
@echo 'Building CPU'
|
|
||||||
@sed -i 's/GPU_BOOSTED true/GPU_BOOSTED false/' config.hpp
|
|
||||||
@make -s -j "$(shell nproc)"
|
|
||||||
@echo 'Logging CPU'
|
|
||||||
@make -s start > log_cpu
|
|
||||||
@sed -i 's/GPU_BOOSTED false/GPU_BOOSTED true/' config.hpp
|
|
||||||
@echo 'Cleaning up'
|
|
||||||
@make -s reset
|
|
||||||
|
|
||||||
.PHONY: reset
|
|
||||||
reset:
|
reset:
|
||||||
@echo 'Deleting generated states and models'
|
@echo Deleting generated states and models
|
||||||
@rm -frv $(OUT_DIR)/* $(MODELS_DIR)/*
|
@rm -rf $(OUT_DIR)/* $(MODELS_DIR)/* | true
|
||||||
#@ln -sv /mnt/pierre_stuffs/ViolaJones/cpp/models .
|
|
||||||
#@ln -sv /mnt/pierre_stuffs/ViolaJones/cpp/out .
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
clean:
|
||||||
@rm -fv $(EXEC) log_gpu log_cpu
|
@rm $(EXEC)
|
||||||
|
|
||||||
.PHONY: mrproper
|
mrproper:
|
||||||
mrproper: clean
|
@rm -r $(OBJ_DIR)
|
||||||
@rm -rfv $(OBJ_DIR) gmon.out
|
|
||||||
|
|
||||||
.PHONY: help
|
|
||||||
help:
|
|
||||||
@echo "Available targets:"
|
|
||||||
@echo "\tall: alias for start, (default target)"
|
|
||||||
@echo "\tstart: Start the ViolaJones algorithm, require data beforehand downloaded by the downloader."
|
|
||||||
@echo "\tdebug: Debug the ViolaJones algorithm, require data beforehand downloaded by the downloader."
|
|
||||||
@echo "\tprofile: Profile the ViolaJones algorithm functions timestamps, require data beforehand downloaded by the downloader."
|
|
||||||
@echo "\treset: Will delete any saved models and processed data made by ViolaJones."
|
|
||||||
@echo "\tmrproper: Will remove cpp binary files. Will execute reset target beforehand."
|
|
||||||
|
|
||||||
.PHONY: check-nvcc-works
|
|
||||||
check-nvcc-works:
|
|
||||||
@nvcc --version >/dev/null 2>&1 || (echo 'Please install NVIDIA Cuda compiler.' && exit 1)
|
|
||||||
|
|
||||||
.PHONY: check-gprof-works
|
|
||||||
check-gprof-works:
|
|
||||||
@gprof --version >/dev/null 2>&1 || (echo 'Please install GNU gprof.' && exit 1)
|
|
||||||
|
|
||||||
.PHONY: check-gprof2dot-works
|
|
||||||
check-gprof2dot-works:
|
|
||||||
@gprof2dot --help >/dev/null 2>&1 || (echo 'Please install gprof2dot.' && exit 1)
|
|
||||||
|
|
||||||
.PHONY: check-dot-works
|
|
||||||
check-dot-works:
|
|
||||||
@dot --version >/dev/null 2>&1 || (echo 'Please install dot from graphviz.' && exit 1)
|
|
||||||
|
|
||||||
.PHONY: check-valgrind-works
|
|
||||||
check-valgrind-works:
|
|
||||||
@valgrind --version >/dev/null 2>&1 || (echo 'Please install valgrind.' && exit 1)
|
|
||||||
|
|
||||||
.PHONY: check-computer-sanitizer-works
|
|
||||||
check-computer-sanitizer-works:
|
|
||||||
@computer-sanitizer --version >/dev/null 2>&1 || (echo 'Please install Compute Sanitizer from Cuda toolkit.' && exit 1)
|
|
||||||
|
|
||||||
-include $(OBJ:.o=.d)
|
|
||||||
|
@ -1,61 +1,56 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
#include "ViolaJones_device.hpp"
|
#include "config.hpp"
|
||||||
|
#include "ViolaJonesGPU.hpp"
|
||||||
|
#include "ViolaJonesCPU.hpp"
|
||||||
|
|
||||||
constexpr static inline void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept {
|
static inline void add_empty_feature(const np::Array<uint8_t>& feats, size_t& n) noexcept {
|
||||||
memset(&feats[n], 0, 4 * sizeof(uint8_t));
|
memset(&feats[n], 0, 4 * sizeof(uint8_t));
|
||||||
n += 4;
|
n += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static inline void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
static inline void add_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i + w;
|
feats[n++] = i + w;
|
||||||
feats[n++] = j;
|
feats[n++] = j;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static inline void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
static inline void add_immediate_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i;
|
feats[n++] = i;
|
||||||
feats[n++] = j;
|
feats[n++] = j;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static inline void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
static inline void add_bottom_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i;
|
feats[n++] = i;
|
||||||
feats[n++] = j + h;
|
feats[n++] = j + h;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static inline void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
static inline void add_right2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i + 2 * w;
|
feats[n++] = i + 2 * w;
|
||||||
feats[n++] = j;
|
feats[n++] = j;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static inline void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
static inline void add_bottom2_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i;
|
feats[n++] = i;
|
||||||
feats[n++] = j + 2 * h;
|
feats[n++] = j + 2 * h;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static inline void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
static inline void add_bottom_right_feature(const np::Array<uint8_t>& feats, size_t& n, const uint16_t& i, const uint16_t& j, const uint16_t& w, const uint16_t& h) noexcept {
|
||||||
feats[n++] = i + w;
|
feats[n++] = i + w;
|
||||||
feats[n++] = j + h;
|
feats[n++] = j + h;
|
||||||
feats[n++] = w;
|
feats[n++] = w;
|
||||||
feats[n++] = h;
|
feats[n++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Initialize the features based on the input shape.
|
|
||||||
*
|
|
||||||
* @param width Width of the image
|
|
||||||
* @param height Height of the image
|
|
||||||
* @return The initialized features
|
|
||||||
*/
|
|
||||||
np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height) noexcept {
|
np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height) noexcept {
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
uint16_t w, h, i, j;
|
uint16_t w, h, i, j;
|
||||||
@ -115,11 +110,11 @@ np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height)
|
|||||||
return feats;
|
return feats;
|
||||||
}
|
}
|
||||||
|
|
||||||
//np::Array<int32_t> select_percentile(const np::Array<uint8_t> X_feat, const np::Array<uint8_t> y) noexcept {
|
//np::Array<int> select_percentile(const np::Array<uint8_t> X_feat, const np::Array<uint8_t> y) noexcept {
|
||||||
// std::vector<float64_t> class_0, class_1;
|
// std::vector<float64_t> class_0, class_1;
|
||||||
//
|
//
|
||||||
// const int32_t im_size = X_feat.shape[0] / y.shape[0];
|
// const int im_size = X_feat.shape[0] / y.shape[0];
|
||||||
// int32_t idy = 0, n_samples_per_class_0 = 0, n_samples_per_class_1 = 0;
|
// int idy = 0, n_samples_per_class_0 = 0, n_samples_per_class_1 = 0;
|
||||||
// for (size_t i = 0; i < X_feat.shape[0]; i += im_size) {
|
// for (size_t i = 0; i < X_feat.shape[0]; i += im_size) {
|
||||||
// if (y[idy] == 0) {
|
// if (y[idy] == 0) {
|
||||||
// ++n_samples_per_class_0;
|
// ++n_samples_per_class_0;
|
||||||
@ -131,24 +126,24 @@ np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height)
|
|||||||
// }
|
// }
|
||||||
// ++idy;
|
// ++idy;
|
||||||
// }
|
// }
|
||||||
// const int32_t n_samples = n_samples_per_class_0 + n_samples_per_class_1;
|
// const int n_samples = n_samples_per_class_0 + n_samples_per_class_1;
|
||||||
//
|
//
|
||||||
// float64_t ss_alldata_0 = 0;
|
// float64_t ss_alldata_0 = 0;
|
||||||
// for (int32_t i = 0;i < n_samples_per_class_0;++i)
|
// for (int i = 0;i < n_samples_per_class_0;++i)
|
||||||
// ss_alldata_0 += (class_0[i] * class_0[i]);
|
// ss_alldata_0 += (class_0[i] * class_0[i]);
|
||||||
//
|
//
|
||||||
// float64_t ss_alldata_1 = 0;
|
// float64_t ss_alldata_1 = 0;
|
||||||
// for (int32_t i = 0;i < n_samples_per_class_1;++i)
|
// for (int i = 0;i < n_samples_per_class_1;++i)
|
||||||
// ss_alldata_1 += (class_1[i] * class_1[i]);
|
// ss_alldata_1 += (class_1[i] * class_1[i]);
|
||||||
//
|
//
|
||||||
// const float64_t ss_alldata = ss_alldata_0 + ss_alldata_1;
|
// const float64_t ss_alldata = ss_alldata_0 + ss_alldata_1;
|
||||||
//
|
//
|
||||||
// float64_t sums_classes_0 = 0;
|
// float64_t sums_classes_0 = 0;
|
||||||
// for (int32_t i = 0;i < n_samples_per_class_0;++i)
|
// for (int i = 0;i < n_samples_per_class_0;++i)
|
||||||
// sums_classes_0 += class_0[i];
|
// sums_classes_0 += class_0[i];
|
||||||
//
|
//
|
||||||
// float64_t sums_classes_1 = 0;
|
// float64_t sums_classes_1 = 0;
|
||||||
// for (int32_t i = 0;i < n_samples_per_class_1;++i)
|
// for (int i = 0;i < n_samples_per_class_1;++i)
|
||||||
// sums_classes_1 += class_1[i];
|
// sums_classes_1 += class_1[i];
|
||||||
//
|
//
|
||||||
// float64_t sq_of_sums_alldata = sums_classes_0 + sums_classes_1;
|
// float64_t sq_of_sums_alldata = sums_classes_0 + sums_classes_1;
|
||||||
@ -159,21 +154,15 @@ np::Array<uint8_t> build_features(const uint16_t& width, const uint16_t& height)
|
|||||||
// const float64_t ss_tot = ss_alldata - sq_of_sums_alldata / n_samples;
|
// const float64_t ss_tot = ss_alldata - sq_of_sums_alldata / n_samples;
|
||||||
// const float64_t sqd_sum_bw_n = sq_of_sums_args_0 / n_samples_per_class_0 + sq_of_sums_args_1 / n_samples_per_class_1 - sq_of_sums_alldata / n_samples;
|
// const float64_t sqd_sum_bw_n = sq_of_sums_args_0 / n_samples_per_class_0 + sq_of_sums_args_1 / n_samples_per_class_1 - sq_of_sums_alldata / n_samples;
|
||||||
// const float64_t ss_wn = ss_tot - sqd_sum_bw_n;
|
// const float64_t ss_wn = ss_tot - sqd_sum_bw_n;
|
||||||
// const int32_t df_wn = n_samples - 2;
|
// const int df_wn = n_samples - 2;
|
||||||
// const float64_t msw = ss_wn / df_wn;
|
// const float64_t msw = ss_wn / df_wn;
|
||||||
// const float64_t f_values = sqd_sum_bw_n / msw;
|
// const float64_t f_values = sqd_sum_bw_n / msw;
|
||||||
//
|
//
|
||||||
// const np::Array<int32_t> res = np::empty<int32_t>({ static_cast<size_t>(std::ceil(static_cast<float64_t>(im_size) / 10.0)) });
|
// const np::Array<int> res = np::empty<int>({ static_cast<size_t>(std::ceil(static_cast<float64_t>(im_size) / 10.0)) });
|
||||||
// // TODO Complete code
|
// // TODO Complete code
|
||||||
// return res;
|
// return res;
|
||||||
//}
|
//}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Initialize the weights of the weak classifiers based on the training labels.
|
|
||||||
*
|
|
||||||
* @param y_train Training labels
|
|
||||||
* @return The initialized weights
|
|
||||||
*/
|
|
||||||
np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept {
|
np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept {
|
||||||
np::Array<float64_t> weights = np::empty<float64_t>(y_train.shape);
|
np::Array<float64_t> weights = np::empty<float64_t>(y_train.shape);
|
||||||
const uint16_t t = np::sum(np::astype<uint16_t>(y_train));
|
const uint16_t t = np::sum(np::astype<uint16_t>(y_train));
|
||||||
@ -183,30 +172,13 @@ np::Array<float64_t> init_weights(const np::Array<uint8_t>& y_train) noexcept {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
np::Array<uint8_t> classify_weak_clf(const np::Array<int32_t>& X_feat_i, const size_t& j, const float64_t& threshold, const float64_t& polarity) noexcept {
|
||||||
* @brief Classify the integrated features based on polarity and threshold.
|
|
||||||
*
|
|
||||||
* @param X_feat_i Integrated features
|
|
||||||
* @param j Index of the classifier
|
|
||||||
* @param threshold Trained threshold
|
|
||||||
* @param polarity Trained polarity
|
|
||||||
* @return Classified features
|
|
||||||
*/
|
|
||||||
static np::Array<uint8_t> classify_weak_clf(const np::Array<int32_t>& X_feat_i, const size_t& j, const float64_t& threshold, const float64_t& polarity) noexcept {
|
|
||||||
np::Array<uint8_t> res = np::empty<uint8_t>({ X_feat_i.shape[1] });
|
np::Array<uint8_t> res = np::empty<uint8_t>({ X_feat_i.shape[1] });
|
||||||
for(size_t i = 0; i < res.shape[0]; ++i)
|
for(size_t i = 0; i < res.shape[0]; ++i)
|
||||||
res[i] = polarity * X_feat_i[j * X_feat_i.shape[1] + i] < polarity * threshold ? 1 : 0;
|
res[i] = polarity * X_feat_i[j * X_feat_i.shape[1] + i] < polarity * threshold ? 1 : 0;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Classify the trained classifiers on the given features.
|
|
||||||
*
|
|
||||||
* @param alphas Trained alphas
|
|
||||||
* @param classifiers Trained classifiers
|
|
||||||
* @param X_feat integrated features
|
|
||||||
* @return Classification results
|
|
||||||
*/
|
|
||||||
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, const np::Array<float64_t>& classifiers, const np::Array<int32_t>& X_feat) noexcept {
|
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, const np::Array<float64_t>& classifiers, const np::Array<int32_t>& X_feat) noexcept {
|
||||||
np::Array<float64_t> total = np::zeros<float64_t>({ X_feat.shape[1] });
|
np::Array<float64_t> total = np::zeros<float64_t>({ X_feat.shape[1] });
|
||||||
|
|
||||||
@ -227,15 +199,6 @@ np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>& alphas, cons
|
|||||||
return y_pred;
|
return y_pred;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Select the best classifer given their predictions.
|
|
||||||
*
|
|
||||||
* @param classifiers The weak classifiers
|
|
||||||
* @param weights Trained weights of each classifiers
|
|
||||||
* @param X_feat Integrated features
|
|
||||||
* @param y Features labels
|
|
||||||
* @return Index of the best classifier, the best error and the best accuracy
|
|
||||||
*/
|
|
||||||
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>& classifiers, const np::Array<float64_t>& weights, const np::Array<int32_t>& X_feat, const np::Array<uint8_t>& y) noexcept {
|
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>& classifiers, const np::Array<float64_t>& weights, const np::Array<int32_t>& X_feat, const np::Array<uint8_t>& y) noexcept {
|
||||||
std::tuple<int32_t, float64_t, np::Array<float64_t>> res = { -1, np::inf, np::empty<float64_t>({ X_feat.shape[0] }) };
|
std::tuple<int32_t, float64_t, np::Array<float64_t>> res = { -1, np::inf, np::empty<float64_t>({ X_feat.shape[0] }) };
|
||||||
|
|
||||||
@ -253,15 +216,6 @@ std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Train the weak calssifiers.
|
|
||||||
*
|
|
||||||
* @param T Number of weak classifiers
|
|
||||||
* @param X_feat Integrated features
|
|
||||||
* @param X_feat_argsort Sorted indexes of the integrated features
|
|
||||||
* @param y Features labels
|
|
||||||
* @return List of trained alphas and the list of the final classifiers
|
|
||||||
*/
|
|
||||||
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y) noexcept {
|
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y) noexcept {
|
||||||
np::Array<float64_t> weights = init_weights(y);
|
np::Array<float64_t> weights = init_weights(y);
|
||||||
np::Array<float64_t> alphas = np::empty<float64_t>({ T });
|
np::Array<float64_t> alphas = np::empty<float64_t>({ T });
|
||||||
@ -269,7 +223,11 @@ std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np:
|
|||||||
|
|
||||||
for(size_t t = 0; t < T; ++t ){
|
for(size_t t = 0; t < T; ++t ){
|
||||||
weights /= np::sum(weights);
|
weights /= np::sum(weights);
|
||||||
const np::Array<float64_t> classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights);
|
#if GPU_BOOSTED
|
||||||
|
const np::Array<float64_t> classifiers = train_weak_clf_gpu(X_feat, X_feat_argsort, y, weights);
|
||||||
|
#else
|
||||||
|
const np::Array<float64_t> classifiers = train_weak_clf_cpu(X_feat, X_feat_argsort, y, weights);
|
||||||
|
#endif
|
||||||
const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y);
|
const auto [ clf, error, accuracy ] = select_best(classifiers, weights, X_feat, y);
|
||||||
float64_t beta = error / (1.0 - error);
|
float64_t beta = error / (1.0 - error);
|
||||||
weights *= np::pow(beta, (1.0 - accuracy));
|
weights *= np::pow(beta, (1.0 - accuracy));
|
||||||
@ -281,13 +239,6 @@ std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t& T, const np:
|
|||||||
return { alphas, final_classifier };
|
return { alphas, final_classifier };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the accuracy score i.e. how a given set of measurements are close to their true value.
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed accuracy score
|
|
||||||
*/
|
|
||||||
float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
float64_t res = 0.0;
|
float64_t res = 0.0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -296,13 +247,6 @@ float64_t accuracy_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>&
|
|||||||
return res / y.shape[0];
|
return res / y.shape[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the precision score i.e. how a given set of measurements are close to each other.
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed precision score
|
|
||||||
*/
|
|
||||||
float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
uint16_t true_positive = 0, false_positive = 0;
|
uint16_t true_positive = 0, false_positive = 0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -315,13 +259,6 @@ float64_t precision_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>&
|
|||||||
return static_cast<float64_t>(true_positive) / (true_positive + false_positive);
|
return static_cast<float64_t>(true_positive) / (true_positive + false_positive);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the recall score i.e. the ratio (TP / (TP + FN)) where TP is the number of true positives and FN the number of false negatives.
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed recall score
|
|
||||||
*/
|
|
||||||
float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
uint16_t true_positive = 0, false_negative = 0;
|
uint16_t true_positive = 0, false_negative = 0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -335,35 +272,12 @@ float64_t recall_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_
|
|||||||
return static_cast<float64_t>(true_positive) / (true_positive + false_negative);
|
return static_cast<float64_t>(true_positive) / (true_positive + false_negative);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the F1 score aka balanced F-score or F-measure.
|
|
||||||
*
|
|
||||||
* F1 = (2 * TP) / (2 * TP + FP + FN)
|
|
||||||
* where TP is the true positives,
|
|
||||||
* FP is the false positives,
|
|
||||||
* and FN is the false negatives
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed F1 score
|
|
||||||
*/
|
|
||||||
float64_t f1_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
float64_t f1_score(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
const float64_t precision = precision_score(y, y_pred);
|
const float64_t precision = precision_score(y, y_pred);
|
||||||
const float64_t recall = recall_score(y, y_pred);
|
const float64_t recall = recall_score(y, y_pred);
|
||||||
return 2 * (precision * recall) / (precision + recall);
|
return 2 * (precision * recall) / (precision + recall);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the confusion matrix to evaluate a given classification.
|
|
||||||
*
|
|
||||||
* A confusion matrix of a binary classification consists of a 2x2 matrix containing
|
|
||||||
* | True negatives | False positives |
|
|
||||||
* | False negatives | True positives |
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed confusion matrix
|
|
||||||
*/
|
|
||||||
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>& y, const np::Array<uint8_t>& y_pred) noexcept {
|
||||||
uint16_t true_positive = 0, false_positive = 0, true_negative = 0, false_negative = 0;
|
uint16_t true_positive = 0, false_positive = 0, true_negative = 0, false_negative = 0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -379,3 +293,4 @@ std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Ar
|
|||||||
++false_positive;
|
++false_positive;
|
||||||
return std::make_tuple(true_negative, false_positive, false_negative, true_positive);
|
return std::make_tuple(true_negative, false_positive, false_negative, true_positive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,15 +2,8 @@
|
|||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
#include "toolbox.hpp"
|
#include "toolbox.hpp"
|
||||||
|
//#include "config.hpp"
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test if a array from a CPU computation is equal to a GPU computation equivalent.
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the arrays to test
|
|
||||||
* @param cpu CPU Array
|
|
||||||
* @param gpu GPU Array
|
|
||||||
* @return Whether the test was succesful
|
|
||||||
*/
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
|
bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
|
||||||
if (cpu.shape != gpu.shape) {
|
if (cpu.shape != gpu.shape) {
|
||||||
@ -34,14 +27,6 @@ bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noex
|
|||||||
return eq == length;
|
return eq == length;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test if a given 2D array of indices sort a given 2D array
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the array to test
|
|
||||||
* @param a 2D Array of data
|
|
||||||
* @param indices 2D Indices that sort the array
|
|
||||||
* @return Whether the test was successful
|
|
||||||
*/
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
|
bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
|
||||||
if (a.shape != indices.shape) {
|
if (a.shape != indices.shape) {
|
||||||
@ -66,18 +51,6 @@ bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indi
|
|||||||
return correct == total;
|
return correct == total;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Benchmark a function and display the result in stdout.
|
|
||||||
*
|
|
||||||
* @tparam T Resulting type of the function to benchmark
|
|
||||||
* @tparam F Signature of the function to call
|
|
||||||
* @tparam Args Arguments variadic of the function to call
|
|
||||||
* @param step_name Name of the function to log
|
|
||||||
* @param column_width Width of the column to print during logging
|
|
||||||
* @param fnc Function to benchmark
|
|
||||||
* @param args Arguments to pass to the function to call
|
|
||||||
* @return Result of the benchmarked function
|
|
||||||
*/
|
|
||||||
template <typename T, typename F, typename... Args>
|
template <typename T, typename F, typename... Args>
|
||||||
T benchmark_function(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
|
T benchmark_function(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -91,16 +64,6 @@ T benchmark_function(const char* const step_name, const int32_t& column_width, c
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Benchmark a function and display the result in stdout.
|
|
||||||
*
|
|
||||||
* @tparam F Signature of the function to call
|
|
||||||
* @tparam Args Arguments variadic of the function to call
|
|
||||||
* @param step_name Name of the function to log
|
|
||||||
* @param column_width Width of the column to print during logging
|
|
||||||
* @param fnc Function to benchmark
|
|
||||||
* @param args Arguments to pass to the function to call
|
|
||||||
*/
|
|
||||||
template <typename F, typename... Args>
|
template <typename F, typename... Args>
|
||||||
void benchmark_function_void(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
|
void benchmark_function_void(const char* const step_name, const int32_t& column_width, const F& fnc, Args &&...args) noexcept {
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -113,22 +76,6 @@ void benchmark_function_void(const char* const step_name, const int32_t& column_
|
|||||||
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
formatted_row<3>({ column_width, -18, 29 }, { step_name, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Either execute a function then save the result or load the already cached result.
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the resulting array
|
|
||||||
* @tparam F Signature of the function to call
|
|
||||||
* @tparam Args Arguments variadic of the function to call
|
|
||||||
* @param step_name Name of the function to log
|
|
||||||
* @param column_width Width of the column to print during logging
|
|
||||||
* @param filename Name of the filename where the result is saved
|
|
||||||
* @param force_redo Recall the function even if the result is already saved, ignored if result is not cached
|
|
||||||
* @param save_state Whether the computed result will be saved or not, ignore if loading already cached result
|
|
||||||
* @param out_dir Path of the directory to save the result
|
|
||||||
* @param fnc Function to call
|
|
||||||
* @param args Arguments to pass to the function to call
|
|
||||||
* @return The result of the called function
|
|
||||||
*/
|
|
||||||
template <typename T, typename F, typename... Args>
|
template <typename T, typename F, typename... Args>
|
||||||
np::Array<T> state_saver(const char* const step_name, const int32_t& column_width, const char* const filename, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
|
np::Array<T> state_saver(const char* const step_name, const int32_t& column_width, const char* const filename, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
|
||||||
char filepath[BUFFER_SIZE] = { 0 };
|
char filepath[BUFFER_SIZE] = { 0 };
|
||||||
@ -136,6 +83,7 @@ np::Array<T> state_saver(const char* const step_name, const int32_t& column_widt
|
|||||||
|
|
||||||
np::Array<T> bin;
|
np::Array<T> bin;
|
||||||
if (!std::filesystem::exists(filepath) || force_redo) {
|
if (!std::filesystem::exists(filepath) || force_redo) {
|
||||||
|
//bin = std::move(benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...));
|
||||||
bin = benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...);
|
bin = benchmark_function<np::Array<T>>(step_name, column_width, fnc, std::forward<Args>(args)...);
|
||||||
if(save_state){
|
if(save_state){
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -153,35 +101,20 @@ np::Array<T> state_saver(const char* const step_name, const int32_t& column_widt
|
|||||||
fprintf(stderr, "Loading results of %s\r", step_name);
|
fprintf(stderr, "Loading results of %s\r", step_name);
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
#endif
|
#endif
|
||||||
|
//bin = std::move(load<T>(filepath));
|
||||||
bin = load<T>(filepath);
|
bin = load<T>(filepath);
|
||||||
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
|
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
|
||||||
}
|
}
|
||||||
return bin;
|
return bin;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Either execute a function then saves the results or load the already cached result.
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the resulting arrays
|
|
||||||
* @tparam F Signature of the function to call
|
|
||||||
* @tparam Args Arguments variadic of the function to call
|
|
||||||
* @param step_name Name of the function to log
|
|
||||||
* @param column_width Width of the column to print during logging
|
|
||||||
* @param filenames List of names of the filenames where the results are save
|
|
||||||
* @param force_redo Recall the function even if the results are already saved, ignored if results are not cached
|
|
||||||
* @param save_state Whether the computed results will be saved or not, ignored if loading already cached results
|
|
||||||
* @param out_dir Path of the directory to save the results
|
|
||||||
* @param fnc Function to call
|
|
||||||
* @param args Arguments to pass to the function to call
|
|
||||||
* @return The results of the called function
|
|
||||||
*/
|
|
||||||
template <typename T, size_t N, typename F, typename... Args>
|
template <typename T, size_t N, typename F, typename... Args>
|
||||||
std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32_t& column_width, const std::vector<const char*>& filenames, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
|
std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32_t& column_width, const std::vector<const char*>& filenames, const bool& force_redo, const bool& save_state, const char* const out_dir, const F& fnc, Args &&...args) noexcept {
|
||||||
char filepath[BUFFER_SIZE] = { 0 };
|
char filepath[BUFFER_SIZE] = { 0 };
|
||||||
bool abs = false;
|
bool abs = false;
|
||||||
for (const char* const filename : filenames){
|
for (const char* filename : filenames){
|
||||||
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename);
|
sprintf(filepath, "%s/%s.bin", out_dir, filename);
|
||||||
if (!std::filesystem::exists(filepath)) {
|
if (!fs::exists(filepath)) {
|
||||||
abs = true;
|
abs = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -189,6 +122,7 @@ std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32
|
|||||||
|
|
||||||
std::array<np::Array<T>, N> bin;
|
std::array<np::Array<T>, N> bin;
|
||||||
if (abs || force_redo) {
|
if (abs || force_redo) {
|
||||||
|
//bin = std::move(benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...));
|
||||||
bin = benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...);
|
bin = benchmark_function<std::array<np::Array<T>, N>>(step_name, column_width, fnc, std::forward<Args>(args)...);
|
||||||
if (save_state){
|
if (save_state){
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -196,8 +130,8 @@ std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32
|
|||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
#endif
|
#endif
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (const char* const filename : filenames){
|
for (const char* filename : filenames){
|
||||||
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename);
|
sprintf(filepath, "%s/%s.bin", out_dir, filename);
|
||||||
save<T>(bin[i++], filepath);
|
save<T>(bin[i++], filepath);
|
||||||
}
|
}
|
||||||
#if __DEBUG == false
|
#if __DEBUG == false
|
||||||
@ -211,116 +145,25 @@ std::array<np::Array<T>, N> state_saver(const char* const step_name, const int32
|
|||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
#endif
|
#endif
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (const char* const filename : filenames){
|
for (const char* filename : filenames){
|
||||||
|
bin[i++] = std::move(load<T>(filepath));
|
||||||
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename);
|
snprintf(filepath, BUFFER_SIZE, "%s/%s.bin", out_dir, filename);
|
||||||
bin[i++] = load<T>(filepath);
|
|
||||||
}
|
}
|
||||||
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
|
formatted_row<3>({ column_width, -18, 29 }, { step_name, "None", "loaded saved state" });
|
||||||
}
|
}
|
||||||
return bin;
|
return bin;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>&) noexcept;
|
||||||
* @brief Initialize the features based on the input shape.
|
|
||||||
*
|
|
||||||
* @param width Width of the image
|
|
||||||
* @param height Height of the image
|
|
||||||
* @return The initialized features
|
|
||||||
*/
|
|
||||||
np::Array<uint8_t> build_features(const uint16_t&, const uint16_t&) noexcept;
|
np::Array<uint8_t> build_features(const uint16_t&, const uint16_t&) noexcept;
|
||||||
//np::Array<int32_t> select_percentile(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
np::Array<int> select_percentile(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Classify the trained classifiers on the given features.
|
|
||||||
*
|
|
||||||
* @param alphas Trained alphas
|
|
||||||
* @param classifiers Trained classifiers
|
|
||||||
* @param X_feat integrated features
|
|
||||||
* @return Classification results
|
|
||||||
*/
|
|
||||||
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&) noexcept;
|
np::Array<uint8_t> classify_viola_jones(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Initialize the weights of the weak classifiers based on the training labels.
|
|
||||||
*
|
|
||||||
* @param y_train Training labels
|
|
||||||
* @return The initialized weights
|
|
||||||
*/
|
|
||||||
np::Array<float64_t> init_weights(const np::Array<uint8_t>&) noexcept;
|
np::Array<float64_t> init_weights(const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Select the best classifer given their predictions.
|
|
||||||
*
|
|
||||||
* @param classifiers The weak classifiers
|
|
||||||
* @param weights Trained weights of each classifiers
|
|
||||||
* @param X_feat Integrated features
|
|
||||||
* @param y Features labels
|
|
||||||
* @return Index of the best classifier, the best error and the best accuracy
|
|
||||||
*/
|
|
||||||
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&,
|
std::tuple<int32_t, float64_t, np::Array<float64_t>> select_best(const np::Array<float64_t>&, const np::Array<float64_t>&, const np::Array<int32_t>&,
|
||||||
const np::Array<uint8_t>&) noexcept;
|
const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Train the weak calssifiers.
|
|
||||||
*
|
|
||||||
* @param T Number of weak classifiers
|
|
||||||
* @param X_feat Integrated features
|
|
||||||
* @param X_feat_argsort Sorted indexes of the integrated features
|
|
||||||
* @param y Features labels
|
|
||||||
* @return List of trained alphas and the list of the final classifiers
|
|
||||||
*/
|
|
||||||
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t&, const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&) noexcept;
|
std::array<np::Array<float64_t>, 2> train_viola_jones(const size_t&, const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the accuracy score i.e. how a given set of measurements are close to their true value.
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed accuracy score
|
|
||||||
*/
|
|
||||||
float64_t accuracy_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
float64_t accuracy_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the precision score i.e. how a given set of measurements are close to each other.
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed precision score
|
|
||||||
*/
|
|
||||||
float64_t precision_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
float64_t precision_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the recall score i.e. the ratio (TP / (TP + FN)) where TP is the number of true positives and FN the number of false negatives.
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed recall score
|
|
||||||
*/
|
|
||||||
float64_t recall_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
float64_t recall_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the F1 score aka balanced F-score or F-measure.
|
|
||||||
*
|
|
||||||
* F1 = (2 * TP) / (2 * TP + FP + FN)
|
|
||||||
* where TP is the true positives,
|
|
||||||
* FP is the false positives,
|
|
||||||
* and FN is the false negatives
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed F1 score
|
|
||||||
*/
|
|
||||||
float64_t f1_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
float64_t f1_score(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute the confusion matrix to evaluate a given classification.
|
|
||||||
*
|
|
||||||
* A confusion matrix of a binary classification consists of a 2x2 matrix containing
|
|
||||||
* | True negatives | False positives |
|
|
||||||
* | False negatives | True positives |
|
|
||||||
*
|
|
||||||
* @param y Ground truth labels
|
|
||||||
* @param y_pred Predicted labels
|
|
||||||
* @return computed confusion matrix
|
|
||||||
*/
|
|
||||||
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
std::tuple<uint16_t, uint16_t, uint16_t, uint16_t> confusion_matrix(const np::Array<uint8_t>&, const np::Array<uint8_t>&) noexcept;
|
||||||
|
@ -1,15 +1,7 @@
|
|||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
#include "config.hpp"
|
#include "toolbox.hpp"
|
||||||
|
|
||||||
#if GPU_BOOSTED == false
|
np::Array<uint32_t> set_integral_image_cpu(const np::Array<uint8_t>& set) noexcept {
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Transform the input images in integrated images (CPU version).
|
|
||||||
*
|
|
||||||
* @param X Dataset of images
|
|
||||||
* @return Dataset of integrated images
|
|
||||||
*/
|
|
||||||
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& set) noexcept {
|
|
||||||
np::Array<uint32_t> X_ii = np::empty<uint32_t>(set.shape);
|
np::Array<uint32_t> X_ii = np::empty<uint32_t>(set.shape);
|
||||||
|
|
||||||
size_t i, y, x, s;
|
size_t i, y, x, s;
|
||||||
@ -39,14 +31,7 @@ constexpr static inline int16_t __compute_feature__(const np::Array<uint32_t>& X
|
|||||||
return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w];
|
return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
np::Array<int32_t> apply_features_cpu(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
||||||
* @brief Apply the features on a integrated image dataset (CPU version).
|
|
||||||
*
|
|
||||||
* @param feats Features to apply
|
|
||||||
* @param X_ii Integrated image dataset
|
|
||||||
* @return Applied features
|
|
||||||
*/
|
|
||||||
np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
|
||||||
np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
||||||
|
|
||||||
size_t j, feat_idx = 0;
|
size_t j, feat_idx = 0;
|
||||||
@ -66,7 +51,7 @@ np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Arr
|
|||||||
return X_feat;
|
return X_feat;
|
||||||
}
|
}
|
||||||
|
|
||||||
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y, const np::Array<float64_t>& weights) noexcept {
|
np::Array<float64_t> train_weak_clf_cpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y, const np::Array<float64_t>& weights) noexcept {
|
||||||
float64_t total_pos = 0.0, total_neg = 0.0;
|
float64_t total_pos = 0.0, total_neg = 0.0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
(y[i] == static_cast<uint8_t>(1) ? total_pos : total_neg) += weights[i];
|
(y[i] == static_cast<uint8_t>(1) ? total_pos : total_neg) += weights[i];
|
||||||
@ -96,69 +81,7 @@ np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::
|
|||||||
return classifiers;
|
return classifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>& X_feat) noexcept {
|
||||||
* @brief Perform an indirect sort of a given array within a given bound.
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the array
|
|
||||||
* @param a Array to sort
|
|
||||||
* @param indices Array of indices to write to
|
|
||||||
* @param low lower bound to sort
|
|
||||||
* @param high higher bound to sort
|
|
||||||
*/
|
|
||||||
template<typename T>
|
|
||||||
static void argsort(const T* const a, uint16_t* const indices, size_t low, size_t high) noexcept {
|
|
||||||
const size_t total = high - low + 1;
|
|
||||||
|
|
||||||
size_t* const stack = new size_t[total]{low, high};
|
|
||||||
//size_t stack[total];
|
|
||||||
//stack[0] = l;
|
|
||||||
//stack[1] = h;
|
|
||||||
size_t top = 1;
|
|
||||||
|
|
||||||
while (top <= total) {
|
|
||||||
high = stack[top--];
|
|
||||||
low = stack[top--];
|
|
||||||
if(low >= high)
|
|
||||||
break;
|
|
||||||
|
|
||||||
const size_t p = as_partition(a, indices, low, high);
|
|
||||||
|
|
||||||
if (p - 1 > low && p - 1 < total) {
|
|
||||||
stack[++top] = low;
|
|
||||||
stack[++top] = p - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p + 1 < high) {
|
|
||||||
stack[++top] = p + 1;
|
|
||||||
stack[++top] = high;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
delete[] stack;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Apply argsort to every column of a given 2D array.
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the array
|
|
||||||
* @param a 2D Array to sort
|
|
||||||
* @return 2D Array of indices that sort the array
|
|
||||||
*/
|
|
||||||
template<typename T>
|
|
||||||
static np::Array<uint16_t> argsort_bounded(const np::Array<T>& a, const size_t& low, const size_t& high) noexcept {
|
|
||||||
np::Array<uint16_t> indices = np::empty(a.shape);
|
|
||||||
map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; });
|
|
||||||
|
|
||||||
argsort_bounded(a, indices, low, high);
|
|
||||||
return indices;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Perform an indirect sort on each column of a given 2D array (CPU version).
|
|
||||||
*
|
|
||||||
* @param a 2D Array to sort
|
|
||||||
* @return 2D Array of indices that sort the array
|
|
||||||
*/
|
|
||||||
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& X_feat) noexcept {
|
|
||||||
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
|
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
|
||||||
const size_t length = np::prod(X_feat.shape);
|
const size_t length = np::prod(X_feat.shape);
|
||||||
for (size_t i = 0; i < length; i += X_feat.shape[1]) {
|
for (size_t i = 0; i < length; i += X_feat.shape[1]) {
|
||||||
@ -168,4 +91,3 @@ np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& X_feat) noexcept {
|
|||||||
return indices;
|
return indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // GPU_BOOSTED == false
|
|
||||||
|
8
cpp/ViolaJonesCPU.hpp
Normal file
8
cpp/ViolaJonesCPU.hpp
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "data.hpp"
|
||||||
|
|
||||||
|
np::Array<uint32_t> set_integral_image_cpu(const np::Array<uint8_t>&) noexcept;
|
||||||
|
np::Array<int32_t> apply_features_cpu(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
|
||||||
|
np::Array<float64_t> train_weak_clf_cpu(const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&,
|
||||||
|
const np::Array<float64_t>&) noexcept;
|
||||||
|
np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>&) noexcept;
|
@ -1,14 +1,5 @@
|
|||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
#include "config.hpp"
|
|
||||||
|
|
||||||
#if GPU_BOOSTED
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Prefix Sum (scan) of a given dataset.
|
|
||||||
*
|
|
||||||
* @param X Dataset of images to apply sum
|
|
||||||
* @return Scanned dataset of images
|
|
||||||
*/
|
|
||||||
static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept {
|
static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept {
|
||||||
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
|
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
|
||||||
const size_t total = np::prod(X_scan.shape);
|
const size_t total = np::prod(X_scan.shape);
|
||||||
@ -25,14 +16,6 @@ static np::Array<uint32_t> __scanCPU_3d__(const np::Array<uint32_t>& X) noexcept
|
|||||||
return X_scan;
|
return X_scan;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief GPU kernel used to do a parallel prefix sum (scan).
|
|
||||||
*
|
|
||||||
* @param n Number of width blocks
|
|
||||||
* @param j Temporary sum index
|
|
||||||
* @param d_inter Temporary sums on device to add
|
|
||||||
* @param d_X Dataset of images on device to apply sum
|
|
||||||
*/
|
|
||||||
static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np::Array<uint32_t> d_inter, np::Array<uint32_t> d_X) {
|
static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np::Array<uint32_t> d_inter, np::Array<uint32_t> d_X) {
|
||||||
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
|
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
|
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -77,14 +60,6 @@ static __global__ void __kernel_scan_3d__(const uint16_t n, const uint16_t j, np
|
|||||||
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] = sA[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
|
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] = sA[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief GPU kernel for parallel sum.
|
|
||||||
*
|
|
||||||
* @param d_X Dataset of images on device
|
|
||||||
* @param d_s Temporary sums to add on device
|
|
||||||
* @param n Number of width blocks
|
|
||||||
* @param m Height of a block
|
|
||||||
*/
|
|
||||||
static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint32_t> d_s, const uint16_t n, const uint16_t m) {
|
static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint32_t> d_s, const uint16_t n, const uint16_t m) {
|
||||||
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
|
const size_t x_coor = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
|
const size_t y_coor = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -92,14 +67,6 @@ static __global__ void __add_3d__(np::Array<uint32_t> d_X, const np::Array<uint3
|
|||||||
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] += d_s[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + blockIdx.x];
|
d_X[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + x_coor] += d_s[blockIdx.z * d_X.shape[1] * d_X.shape[2] + y_coor * d_X.shape[2] + blockIdx.x];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Parallel Prefix Sum (scan) of a given dataset.
|
|
||||||
*
|
|
||||||
* Read more: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
|
|
||||||
*
|
|
||||||
* @param X Dataset of images
|
|
||||||
* @return Scanned dataset of images
|
|
||||||
*/
|
|
||||||
static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept {
|
static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept {
|
||||||
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
|
np::Array<uint32_t> X_scan = np::empty<uint32_t>(X.shape);
|
||||||
|
|
||||||
@ -145,12 +112,6 @@ static np::Array<uint32_t> __scanGPU_3d__(const np::Array<uint32_t>& X) noexcept
|
|||||||
return X_scan;
|
return X_scan;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief GPU kernel of the function __transpose_3d__.
|
|
||||||
*
|
|
||||||
* @param d_X Dataset of images on device
|
|
||||||
* @param d_Xt Transposed dataset of images on device
|
|
||||||
*/
|
|
||||||
static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::Array<uint32_t> d_Xt) {
|
static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::Array<uint32_t> d_Xt) {
|
||||||
__shared__ uint32_t temp[NB_THREADS_2D_X * NB_THREADS_2D_Y];
|
__shared__ uint32_t temp[NB_THREADS_2D_X * NB_THREADS_2D_Y];
|
||||||
|
|
||||||
@ -167,12 +128,6 @@ static __global__ void __transpose_kernel__(const np::Array<uint32_t> d_X, np::A
|
|||||||
d_Xt[blockIdx.z * d_Xt.shape[1] * d_Xt.shape[2] + x * d_X.shape[2] + y] = temp[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
|
d_Xt[blockIdx.z * d_Xt.shape[1] * d_Xt.shape[2] + x * d_X.shape[2] + y] = temp[threadIdx.x * NB_THREADS_2D_Y + threadIdx.y];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Transpose every images in the given dataset.
|
|
||||||
*
|
|
||||||
* @param X Dataset of images
|
|
||||||
* @return Transposed dataset of images
|
|
||||||
*/
|
|
||||||
static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexcept {
|
static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexcept {
|
||||||
np::Array<uint32_t> Xt = np::empty<uint32_t>({ X.shape[0], X.shape[2], X.shape[1] });
|
np::Array<uint32_t> Xt = np::empty<uint32_t>({ X.shape[0], X.shape[2], X.shape[1] });
|
||||||
|
|
||||||
@ -192,13 +147,7 @@ static np::Array<uint32_t> __transpose_3d__(const np::Array<uint32_t>& X) noexce
|
|||||||
return Xt;
|
return Xt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
np::Array<uint32_t> set_integral_image_gpu(const np::Array<uint8_t>& X) noexcept {
|
||||||
* @brief Transform the input images in integrated images (GPU version).
|
|
||||||
*
|
|
||||||
* @param X Dataset of images
|
|
||||||
* @return Dataset of integrated images
|
|
||||||
*/
|
|
||||||
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& X) noexcept {
|
|
||||||
np::Array<uint32_t> X_ii = np::astype<uint32_t>(X);
|
np::Array<uint32_t> X_ii = np::astype<uint32_t>(X);
|
||||||
X_ii = __scanCPU_3d__(X_ii);
|
X_ii = __scanCPU_3d__(X_ii);
|
||||||
X_ii = __transpose_3d__(X_ii);
|
X_ii = __transpose_3d__(X_ii);
|
||||||
@ -206,17 +155,53 @@ np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& X) noexcept {
|
|||||||
return __transpose_3d__(X_ii);
|
return __transpose_3d__(X_ii);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static inline __device__ int16_t __compute_feature__(const np::Array<uint32_t>& d_X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept {
|
||||||
* @brief GPU kernel of the function train_weak_clf.
|
const size_t _y = y * d_X_ii.shape[1] + x;
|
||||||
*
|
const size_t _yh = _y + h * d_X_ii.shape[1];
|
||||||
* @param d_classifiers Weak classifiers on device to train
|
return d_X_ii[j + _yh + w] + d_X_ii[j + _y] - d_X_ii[j + _yh] - d_X_ii[j + _y + w];
|
||||||
* @param d_y Labels of the features on device
|
}
|
||||||
* @param d_X_feat Feature images dataset on device
|
|
||||||
* @param d_X_feat_argsort Sorted indexes of the integrated features on device
|
static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Array<uint8_t> d_feats, const np::Array<uint32_t> d_X_ii) {
|
||||||
* @param d_weights Weights of the features on device
|
size_t i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
* @param total_pos Total of positive labels in the dataset
|
size_t j = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
* @param total_neg Total of negative labels in the dataset
|
|
||||||
*/
|
if (i >= d_feats.shape[0] || j >= d_X_ii.shape[0])
|
||||||
|
return;
|
||||||
|
|
||||||
|
const size_t k = i * d_X_ii.shape[0] + j;
|
||||||
|
i *= np::prod(d_feats.shape, 1);
|
||||||
|
j *= np::prod(d_X_ii.shape, 1);
|
||||||
|
const int16_t p1 = __compute_feature__(d_X_ii, j, d_feats[i + 0], d_feats[i + 1], d_feats[i + 2], d_feats[i + 3]);
|
||||||
|
const int16_t p2 = __compute_feature__(d_X_ii, j, d_feats[i + 4], d_feats[i + 5], d_feats[i + 6], d_feats[i + 7]);
|
||||||
|
const int16_t n1 = __compute_feature__(d_X_ii, j, d_feats[i + 8], d_feats[i + 9], d_feats[i + 10], d_feats[i + 11]);
|
||||||
|
const int16_t n2 = __compute_feature__(d_X_ii, j, d_feats[i + 12], d_feats[i + 13], d_feats[i + 14], d_feats[i + 15]);
|
||||||
|
d_X_feat[k] = static_cast<int32_t>(p1 + p2) - static_cast<int32_t>(n1 + n2);
|
||||||
|
}
|
||||||
|
|
||||||
|
np::Array<int32_t> apply_features_gpu(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
||||||
|
const np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
||||||
|
int32_t* d_X_feat;
|
||||||
|
|
||||||
|
_print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t)));
|
||||||
|
np::Array<uint32_t> d_X_ii = copyToDevice<uint32_t>("X_ii", X_ii);
|
||||||
|
np::Array<uint8_t> d_feats = copyToDevice<uint8_t>("feats", feats);
|
||||||
|
|
||||||
|
const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X)));
|
||||||
|
const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
|
||||||
|
const dim3 dimGrid(dimX, dimY);
|
||||||
|
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
||||||
|
__apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii);
|
||||||
|
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||||
|
|
||||||
|
_print_cuda_error_("memcpy X_feat", cudaMemcpy(X_feat.data, d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t), cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
_print_cuda_error_("free d_X_feat", cudaFree(d_X_feat));
|
||||||
|
cudaFree("free d_feats", d_feats);
|
||||||
|
cudaFree("free d_X_11", d_X_ii);
|
||||||
|
|
||||||
|
return X_feat;
|
||||||
|
}
|
||||||
|
|
||||||
static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifiers, const np::Array<uint8_t> d_y,
|
static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifiers, const np::Array<uint8_t> d_y,
|
||||||
const np::Array<int32_t> d_X_feat, const np::Array<uint16_t> d_X_feat_argsort,
|
const np::Array<int32_t> d_X_feat, const np::Array<uint16_t> d_X_feat_argsort,
|
||||||
const np::Array<float64_t> d_weights, const float64_t total_pos, const float64_t total_neg) {
|
const np::Array<float64_t> d_weights, const float64_t total_pos, const float64_t total_neg) {
|
||||||
@ -225,7 +210,7 @@ static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifi
|
|||||||
i += threadIdx.x * blockDim.y * blockDim.z;
|
i += threadIdx.x * blockDim.y * blockDim.z;
|
||||||
i += threadIdx.y * blockDim.z;
|
i += threadIdx.y * blockDim.z;
|
||||||
i += threadIdx.z;
|
i += threadIdx.z;
|
||||||
|
// const size_t i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
if(i >= d_classifiers.shape[0])
|
if(i >= d_classifiers.shape[0])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -250,16 +235,7 @@ static __global__ void __train_weak_clf_kernel__(np::Array<float64_t> d_classifi
|
|||||||
d_classifiers[i * 2] = best_threshold; d_classifiers[i * 2 + 1] = best_polarity;
|
d_classifiers[i * 2] = best_threshold; d_classifiers[i * 2 + 1] = best_polarity;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
|
||||||
* @brief Train the weak classifiers on a given dataset (GPU version).
|
|
||||||
*
|
|
||||||
* @param X_feat Feature images dataset
|
|
||||||
* @param X_feat_argsort Sorted indexes of the integrated features
|
|
||||||
* @param y Labels of the features
|
|
||||||
* @param weights Weights of the features
|
|
||||||
* @return Trained weak classifiers
|
|
||||||
*/
|
|
||||||
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
|
|
||||||
const np::Array<float64_t>& weights) noexcept {
|
const np::Array<float64_t>& weights) noexcept {
|
||||||
float64_t total_pos = 0.0, total_neg = 0.0;
|
float64_t total_pos = 0.0, total_neg = 0.0;
|
||||||
for(size_t i = 0; i < y.shape[0]; ++i)
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
||||||
@ -275,6 +251,8 @@ np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::
|
|||||||
|
|
||||||
const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z)));
|
const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS_3D_X * NB_THREADS_3D_Y * NB_THREADS_3D_Z)));
|
||||||
constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z);
|
constexpr const dim3 dimBlock(NB_THREADS_3D_X, NB_THREADS_3D_Y, NB_THREADS_3D_Z);
|
||||||
|
// const size_t n_blocks = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS)));
|
||||||
|
// constexpr const dim3 dimBlock(NB_THREADS);
|
||||||
__train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg);
|
__train_weak_clf_kernel__<<<n_blocks, dimBlock>>>(d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg);
|
||||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||||
|
|
||||||
@ -289,118 +267,28 @@ np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::
|
|||||||
return classifiers;
|
return classifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Compute a feature on a integrated image at a specific coordinate (GPU version).
|
|
||||||
*
|
|
||||||
* @param d_X_ii Dataset of integrated images on device
|
|
||||||
* @param j Image index in the dataset
|
|
||||||
* @param x X coordinate
|
|
||||||
* @param y Y coordinate
|
|
||||||
* @param w width of the feature
|
|
||||||
* @param h height of the feature
|
|
||||||
*/
|
|
||||||
static inline __device__ int16_t __compute_feature__(const np::Array<uint32_t>& d_X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept {
|
|
||||||
const size_t _y = y * d_X_ii.shape[1] + x;
|
|
||||||
const size_t _yh = _y + h * d_X_ii.shape[1];
|
|
||||||
return d_X_ii[j + _yh + w] + d_X_ii[j + _y] - d_X_ii[j + _yh] - d_X_ii[j + _y + w];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief GPU kernel of the function apply_features.
|
|
||||||
*
|
|
||||||
* @param d_X_feat Dataset of image features on device
|
|
||||||
* @param d_feats Features on device to apply
|
|
||||||
* @param d_X_ii Integrated image dataset on device
|
|
||||||
*/
|
|
||||||
static __global__ void __apply_feature_kernel__(int32_t* d_X_feat, const np::Array<uint8_t> d_feats, const np::Array<uint32_t> d_X_ii) {
|
|
||||||
size_t i = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
size_t j = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
|
|
||||||
if (i >= d_feats.shape[0] || j >= d_X_ii.shape[0])
|
|
||||||
return;
|
|
||||||
|
|
||||||
const size_t k = i * d_X_ii.shape[0] + j;
|
|
||||||
i *= np::prod(d_feats.shape, 1);
|
|
||||||
j *= np::prod(d_X_ii.shape, 1);
|
|
||||||
const int16_t p1 = __compute_feature__(d_X_ii, j, d_feats[i + 0], d_feats[i + 1], d_feats[i + 2], d_feats[i + 3]);
|
|
||||||
const int16_t p2 = __compute_feature__(d_X_ii, j, d_feats[i + 4], d_feats[i + 5], d_feats[i + 6], d_feats[i + 7]);
|
|
||||||
const int16_t n1 = __compute_feature__(d_X_ii, j, d_feats[i + 8], d_feats[i + 9], d_feats[i + 10], d_feats[i + 11]);
|
|
||||||
const int16_t n2 = __compute_feature__(d_X_ii, j, d_feats[i + 12], d_feats[i + 13], d_feats[i + 14], d_feats[i + 15]);
|
|
||||||
d_X_feat[k] = static_cast<int32_t>(p1 + p2) - static_cast<int32_t>(n1 + n2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Apply the features on a integrated image dataset (GPU version).
|
|
||||||
*
|
|
||||||
* @param feats Features to apply
|
|
||||||
* @param X_ii Integrated image dataset
|
|
||||||
* @return Applied features
|
|
||||||
*/
|
|
||||||
np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
|
||||||
const np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
|
||||||
int32_t* d_X_feat = nullptr;
|
|
||||||
|
|
||||||
_print_cuda_error_("malloc d_X_feat", cudaMalloc(&d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t)));
|
|
||||||
np::Array<uint32_t> d_X_ii = copyToDevice<uint32_t>("X_ii", X_ii);
|
|
||||||
np::Array<uint8_t> d_feats = copyToDevice<uint8_t>("feats", feats);
|
|
||||||
|
|
||||||
const size_t dimX = static_cast<size_t>(std::ceil(static_cast<float64_t>(feats.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_X)));
|
|
||||||
const size_t dimY = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_ii.shape[0]) / static_cast<float64_t>(NB_THREADS_2D_Y)));
|
|
||||||
const dim3 dimGrid(dimX, dimY);
|
|
||||||
constexpr const dim3 dimBlock(NB_THREADS_2D_X, NB_THREADS_2D_Y);
|
|
||||||
__apply_feature_kernel__<<<dimGrid, dimBlock>>>(d_X_feat, d_feats, d_X_ii);
|
|
||||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
|
||||||
|
|
||||||
_print_cuda_error_("memcpy X_feat", cudaMemcpy(X_feat.data, d_X_feat, np::prod(X_feat.shape) * sizeof(int32_t), cudaMemcpyDeviceToHost));
|
|
||||||
|
|
||||||
_print_cuda_error_("free d_X_feat", cudaFree(d_X_feat));
|
|
||||||
cudaFree("free d_feats", d_feats);
|
|
||||||
cudaFree("free d_X_11", d_X_ii);
|
|
||||||
|
|
||||||
return X_feat;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Partition of the argsort algorithm.
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the array
|
|
||||||
* @param d_a Array on device to sort
|
|
||||||
* @param d_indices Array of indices on device to write to
|
|
||||||
* @param low lower bound to sort
|
|
||||||
* @param high higher bound to sort
|
|
||||||
* @return Last index sorted
|
|
||||||
*/
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__device__ inline static int32_t _as_partition_(const T* d_a, uint16_t* const d_indices, const size_t low, const size_t high) noexcept {
|
__device__ inline static int32_t as_partition_gpu(const T* a, uint16_t* indices, const size_t l, const size_t h) noexcept {
|
||||||
int32_t i = low - 1;
|
int32_t i = l - 1;
|
||||||
for (int32_t j = low; j <= high; ++j)
|
for (int32_t j = l; j <= h; ++j)
|
||||||
if (d_a[d_indices[j]] < d_a[d_indices[high]])
|
if (a[indices[j]] < a[indices[h]])
|
||||||
swap(&d_indices[++i], &d_indices[j]);
|
swap(&indices[++i], &indices[j]);
|
||||||
swap(&d_indices[++i], &d_indices[high]);
|
swap(&indices[++i], &indices[h]);
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Cuda kernel to perform an indirect sort of a given array within a given bound.
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the array
|
|
||||||
* @param d_a Array on device to sort
|
|
||||||
* @param d_indices Array of indices on device to write to
|
|
||||||
* @param low lower bound to sort
|
|
||||||
* @param high higher bound to sort
|
|
||||||
*/
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__device__ void argsort_kernel(const T* d_a, uint16_t* const d_indices, size_t low, size_t high) noexcept {
|
__device__ void argsort_gpu(const T* a, uint16_t* indices, const size_t l, const size_t h) noexcept {
|
||||||
const size_t total = high - low + 1;
|
const size_t total = h - l + 1;
|
||||||
|
|
||||||
//int32_t* stack = new int32_t[total]{low, high};
|
//int32_t* stack = new int32_t[total]{l, h};
|
||||||
//int32_t stack[total];
|
//int32_t stack[total];
|
||||||
int32_t stack[6977];
|
int32_t stack[6977];
|
||||||
//int32_t stack[1<<16];
|
//int32_t stack[1<<16];
|
||||||
stack[0] = low;
|
stack[0] = l;
|
||||||
stack[1] = high;
|
stack[1] = h;
|
||||||
|
|
||||||
size_t top = 1;
|
size_t top = 1, low = l, high = h;
|
||||||
|
|
||||||
while (top <= total) {
|
while (top <= total) {
|
||||||
high = stack[top--];
|
high = stack[top--];
|
||||||
@ -408,7 +296,7 @@ __device__ void argsort_kernel(const T* d_a, uint16_t* const d_indices, size_t l
|
|||||||
if(low >= high)
|
if(low >= high)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
const int32_t p = _as_partition_(d_a, d_indices, low, high);
|
const int32_t p = as_partition_gpu(a, indices, low, high);
|
||||||
|
|
||||||
if (p - 1 > low && p - 1 < total) {
|
if (p - 1 > low && p - 1 < total) {
|
||||||
stack[++top] = low;
|
stack[++top] = low;
|
||||||
@ -423,49 +311,42 @@ __device__ void argsort_kernel(const T* d_a, uint16_t* const d_indices, size_t l
|
|||||||
//delete[] stack;
|
//delete[] stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Cuda kernel where argsort is applied to every column of a given 2D array.
|
|
||||||
*
|
|
||||||
* @tparam T Inner type of the array
|
|
||||||
* @param d_a 2D Array on device to sort
|
|
||||||
* @param d_indices 2D Array of indices on device to write to
|
|
||||||
*/
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__global__ void argsort_bounded(const np::Array<T> d_a, uint16_t* const d_indices){
|
__global__ void argsort_bounded_gpu(const np::Array<T> a, uint16_t* indices){
|
||||||
const size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
|
const size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
if (idx >= d_a.shape[0])
|
if (idx >= a.shape[0])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for(size_t y = 0; y < d_a.shape[1]; ++y) d_indices[idx * d_a.shape[1] + y] = y;
|
for(size_t y = 0; y < a.shape[1]; ++y) indices[idx * a.shape[1] + y] = y;
|
||||||
argsort_kernel(&d_a[idx * d_a.shape[1]], &d_indices[idx * d_a.shape[1]], 0, d_a.shape[1] - 1);
|
argsort_gpu(&a[idx * a.shape[1]], &indices[idx * a.shape[1]], 0, a.shape[1] - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
np::Array<uint16_t> argsort_2d_gpu(const np::Array<int32_t>& X_feat) noexcept {
|
||||||
* @brief Perform an indirect sort on each column of a given 2D array (GPU version).
|
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
|
||||||
*
|
|
||||||
* @param a 2D Array to sort
|
|
||||||
* @return 2D Array of indices that sort the array
|
|
||||||
*/
|
|
||||||
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& a) noexcept {
|
|
||||||
const np::Array<uint16_t> indices = np::empty<uint16_t>(a.shape);
|
|
||||||
|
|
||||||
uint16_t* d_indices = nullptr;
|
uint16_t* d_indices;
|
||||||
const size_t indices_size = np::prod(indices.shape) * sizeof(uint16_t);
|
const size_t indices_size = np::prod(indices.shape) * sizeof(uint16_t);
|
||||||
|
|
||||||
np::Array<int32_t> d_a = copyToDevice<int32_t>("X_feat", a);
|
np::Array<int32_t> d_X_feat = copyToDevice<int32_t>("X_feat", X_feat);
|
||||||
_print_cuda_error_("malloc d_indices", cudaMalloc(&d_indices, indices_size));
|
_print_cuda_error_("malloc d_indices", cudaMalloc(&d_indices, indices_size));
|
||||||
|
|
||||||
const size_t dimGrid = static_cast<size_t>(std::ceil(static_cast<float64_t>(a.shape[0]) / static_cast<float64_t>(NB_THREADS)));
|
const size_t dimGrid = static_cast<size_t>(std::ceil(static_cast<float64_t>(X_feat.shape[0]) / static_cast<float64_t>(NB_THREADS)));
|
||||||
const dim3 dimBlock(NB_THREADS);
|
const dim3 dimBlock(NB_THREADS);
|
||||||
argsort_bounded<<<dimGrid, dimBlock>>>(d_a, d_indices);
|
argsort_bounded_gpu<<<dimGrid, dimBlock>>>(d_X_feat, d_indices);
|
||||||
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
_print_cuda_error_("synchronize", cudaDeviceSynchronize());
|
||||||
|
|
||||||
_print_cuda_error_("memcpy d_indices", cudaMemcpy(indices.data, d_indices, indices_size, cudaMemcpyDeviceToHost));
|
_print_cuda_error_("memcpy d_indices", cudaMemcpy(indices.data, d_indices, indices_size, cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
cudaFree("free d_a", d_a);
|
cudaFree("free d_X_feat", d_X_feat);
|
||||||
_print_cuda_error_("free d_indices", cudaFree(d_indices));
|
_print_cuda_error_("free d_indices", cudaFree(d_indices));
|
||||||
|
|
||||||
return indices;
|
return indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // GPU_BOOSTED
|
__host__ __device__
|
||||||
|
size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
|
||||||
|
size_t result = shape[offset];
|
||||||
|
for(size_t i = 1 + offset; i < shape.length; ++i)
|
||||||
|
result *= shape[i];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
8
cpp/ViolaJonesGPU.hpp
Normal file
8
cpp/ViolaJonesGPU.hpp
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "data.hpp"
|
||||||
|
|
||||||
|
np::Array<uint32_t> set_integral_image_gpu(const np::Array<uint8_t>&) noexcept;
|
||||||
|
np::Array<int32_t> apply_features_gpu(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
|
||||||
|
np::Array<float64_t> train_weak_clf_gpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y,
|
||||||
|
const np::Array<float64_t>& weights) noexcept;
|
||||||
|
np::Array<uint16_t> argsort_2d_gpu(const np::Array<int32_t>& X_feat) noexcept;
|
@ -1,39 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
#include "data.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Transform the input images in integrated images.
|
|
||||||
*
|
|
||||||
* @param X Dataset of images
|
|
||||||
* @return Dataset of integrated images
|
|
||||||
*/
|
|
||||||
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>&) noexcept;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Apply the features on a integrated image dataset.
|
|
||||||
*
|
|
||||||
* @param feats Features to apply
|
|
||||||
* @param X_ii Integrated image dataset
|
|
||||||
* @return Applied features
|
|
||||||
*/
|
|
||||||
np::Array<int32_t> apply_features(const np::Array<uint8_t>&, const np::Array<uint32_t>&) noexcept;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Train the weak classifiers on a given dataset.
|
|
||||||
*
|
|
||||||
* @param X_feat Feature images dataset
|
|
||||||
* @param X_feat_argsort Sorted indexes of the integrated features
|
|
||||||
* @param y Labels of the features
|
|
||||||
* @param weights Weights of the features
|
|
||||||
* @return Trained weak classifiers
|
|
||||||
*/
|
|
||||||
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>&, const np::Array<uint16_t>&, const np::Array<uint8_t>&,
|
|
||||||
const np::Array<float64_t>&) noexcept;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Perform an indirect sort on each column of a given 2D array
|
|
||||||
*
|
|
||||||
* @param a 2D Array to sort
|
|
||||||
* @return 2D Array of indices that sort the array
|
|
||||||
*/
|
|
||||||
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>&) noexcept;
|
|
85
cpp/data.cpp
85
cpp/data.cpp
@ -1,7 +1,9 @@
|
|||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
|
//#include "toolbox.hpp"
|
||||||
|
//#include <cstring>
|
||||||
|
|
||||||
int32_t print(const np::Shape& shape) noexcept {
|
int print(const np::Shape& shape) noexcept {
|
||||||
int32_t num_written = 0;
|
int num_written = 0;
|
||||||
num_written += printf("(");
|
num_written += printf("(");
|
||||||
if (shape.length > 1) {
|
if (shape.length > 1) {
|
||||||
const size_t length = shape.length - 1;
|
const size_t length = shape.length - 1;
|
||||||
@ -15,12 +17,20 @@ int32_t print(const np::Shape& shape) noexcept {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
int32_t print(const np::Array<T>& array, const char* const format) noexcept {
|
int print(const np::Array<T>& array, const char* format) noexcept {
|
||||||
|
//printf("[");
|
||||||
|
//const size_t length = np::prod(array.shape);
|
||||||
|
//for(size_t i = 0; i < length - 1; ++i)
|
||||||
|
// //std::cout << array[i] << " ";
|
||||||
|
// printf("%f ", array[i]);
|
||||||
|
////std::cout << array[array.shape[0] - 1] << "]\n";
|
||||||
|
//printf("%f]\n", array[length - 1]);
|
||||||
|
|
||||||
char format_space[BUFFER_SIZE] = { 0 };
|
char format_space[BUFFER_SIZE] = { 0 };
|
||||||
snprintf(format_space, BUFFER_SIZE,"%s ", format);
|
sprintf(format_space, "%s ", format);
|
||||||
char format_close[BUFFER_SIZE] = { 0 };
|
char format_close[BUFFER_SIZE] = { 0 };
|
||||||
snprintf(format_close, BUFFER_SIZE,"%s]\n", format);
|
sprintf(format_close, "%s]\n", format);
|
||||||
int32_t num_written = 0;
|
int num_written = 0;
|
||||||
|
|
||||||
if (array.shape.length == 1) {
|
if (array.shape.length == 1) {
|
||||||
const size_t max = array.shape[0] - 1;
|
const size_t max = array.shape[0] - 1;
|
||||||
@ -43,16 +53,16 @@ int32_t print(const np::Array<T>& array, const char* const format) noexcept {
|
|||||||
return num_written;
|
return num_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t print(const np::Array<uint8_t>& array) noexcept {
|
int print(const np::Array<uint8_t>& array) noexcept {
|
||||||
return print(array, "%hu");
|
return print(array, "%hu");
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t print(const np::Array<float64_t>& array) noexcept {
|
int print(const np::Array<float64_t>& array) noexcept {
|
||||||
return print(array, "%f");
|
return print(array, "%f");
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t print_feat(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept {
|
int print_feat(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int num_written = 0;
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
const size_t feat_size = np::prod(array.shape, 1);
|
const size_t feat_size = np::prod(array.shape, 1);
|
||||||
const size_t offset = slice.x * feat_size;
|
const size_t offset = slice.x * feat_size;
|
||||||
@ -64,10 +74,10 @@ int32_t print_feat(const np::Array<uint8_t>& array, const np::Slice& slice) noex
|
|||||||
return num_written;
|
return num_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept {
|
int print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int num_written = 0;
|
||||||
if (array.shape.length == 1) {
|
if (array.shape.length == 1) {
|
||||||
const size_t max = slice.y - 1;
|
const size_t max = slice.y - 1; //std::min(slice.y, array.shape[0] - 1);
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
for (size_t i = slice.x; i < max; ++i)
|
for (size_t i = slice.x; i < max; ++i)
|
||||||
num_written += printf("%hu ", array[i]);
|
num_written += printf("%hu ", array[i]);
|
||||||
@ -87,10 +97,10 @@ int32_t print(const np::Array<uint8_t>& array, const np::Slice& slice) noexcept
|
|||||||
return num_written;
|
return num_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept {
|
int print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int num_written = 0;
|
||||||
if (array.shape.length == 1) {
|
if (array.shape.length == 1) {
|
||||||
const size_t max = slice.y - 1;
|
const size_t max = slice.y - 1; //std::min(slice.y, array.shape[0] - 1);
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
for (size_t i = slice.x; i < max; ++i)
|
for (size_t i = slice.x; i < max; ++i)
|
||||||
num_written += printf("%iu ", array[i]);
|
num_written += printf("%iu ", array[i]);
|
||||||
@ -105,35 +115,37 @@ int32_t print(const np::Array<uint32_t>& array, const np::Slice& slice) noexcept
|
|||||||
num_written += printf("%5i ", array[k + i * array.shape[1] + j]);
|
num_written += printf("%5i ", array[k + i * array.shape[1] + j]);
|
||||||
num_written += printf("]\n");
|
num_written += printf("]\n");
|
||||||
}
|
}
|
||||||
num_written += printf("]");
|
num_written += print("]");
|
||||||
}
|
}
|
||||||
return num_written;
|
return num_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t print(const np::Array<int32_t>& array, const np::Slice& slice) noexcept {
|
int print(const np::Array<int32_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int num_written = 0;
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
|
//size_t k = slice.x * array.shape[1] * array.shape[2] + slice.y * array.shape[2] + slice.z;
|
||||||
size_t k = slice.x * array.shape[1];
|
size_t k = slice.x * array.shape[1];
|
||||||
for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
|
for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
|
||||||
num_written += printf("%5i ", array[i]);
|
num_written += printf("%5i ", array[i]);
|
||||||
}
|
}
|
||||||
num_written += printf("]");
|
num_written += print("]");
|
||||||
return num_written;
|
return num_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t print(const np::Array<uint16_t>& array, const np::Slice& slice) noexcept {
|
int print(const np::Array<uint16_t>& array, const np::Slice& slice) noexcept {
|
||||||
int32_t num_written = 0;
|
int num_written = 0;
|
||||||
num_written += printf("[");
|
num_written += printf("[");
|
||||||
|
//size_t k = slice.x * array.shape[1] * array.shape[2] + slice.y * array.shape[2] + slice.z;
|
||||||
size_t k = slice.x * array.shape[1];
|
size_t k = slice.x * array.shape[1];
|
||||||
for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
|
for (size_t i = k; i < k + (slice.y - slice.x); ++i) {
|
||||||
num_written += printf("%5hu ", array[i]);
|
num_written += printf("%5hu ", array[i]);
|
||||||
}
|
}
|
||||||
num_written += printf("]");
|
num_written += print("]");
|
||||||
return num_written;
|
return num_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline np::Array<uint8_t> load_set(const char* const set_name) {
|
static inline np::Array<uint8_t> load_set(const char* set_name) {
|
||||||
FILE* const file = fopen(set_name, "rb");
|
FILE* file = fopen(set_name, "rb");
|
||||||
if (file == NULL) {
|
if (file == NULL) {
|
||||||
print_error_file(set_name);
|
print_error_file(set_name);
|
||||||
throw;
|
throw;
|
||||||
@ -144,7 +156,7 @@ static inline np::Array<uint8_t> load_set(const char* const set_name) {
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
size_t* const dims = new size_t[3]();
|
size_t* dims = new size_t[3]();
|
||||||
if (!sscanf(meta, "%lu %lu %lu", &dims[0], &dims[1], &dims[2])) {
|
if (!sscanf(meta, "%lu %lu %lu", &dims[0], &dims[1], &dims[2])) {
|
||||||
print_error_file(set_name);
|
print_error_file(set_name);
|
||||||
fclose(file);
|
fclose(file);
|
||||||
@ -155,12 +167,13 @@ static inline np::Array<uint8_t> load_set(const char* const set_name) {
|
|||||||
|
|
||||||
const size_t size = np::prod(a.shape);
|
const size_t size = np::prod(a.shape);
|
||||||
size_t i = 0, j = 0;
|
size_t i = 0, j = 0;
|
||||||
int32_t c;
|
int c;
|
||||||
char buff[STRING_INT_SIZE] = { 0 };
|
char buff[STRING_INT_SIZE] = { 0 };
|
||||||
while ((c = fgetc(file)) != EOF && i < size) {
|
while ((c = fgetc(file)) != EOF && i < size) {
|
||||||
if (c == ' ' || c == '\n') {
|
if (c == ' ' || c == '\n') {
|
||||||
buff[j] = '\0';
|
buff[j] = '\0';
|
||||||
a[i++] = static_cast<uint8_t>(atoi(buff));
|
a[i++] = static_cast<uint8_t>(atoi(buff));
|
||||||
|
//memset(buff, 0, STRING_INT_SIZE);
|
||||||
j = 0;
|
j = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -178,20 +191,22 @@ static inline np::Array<uint8_t> load_set(const char* const set_name) {
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
std::array<np::Array<uint8_t>, 4> load_datasets() {
|
||||||
* @brief Load the datasets.
|
|
||||||
*
|
|
||||||
* @return Array containing X_train, y_trait, X_test, y_test
|
|
||||||
*/
|
|
||||||
std::array<np::Array<uint8_t>, 4> load_datasets(void) {
|
|
||||||
return {
|
return {
|
||||||
load_set(DATA_DIR "/X_train.bin"), load_set(DATA_DIR "/y_train.bin"),
|
load_set(DATA_DIR "/X_train.bin"), load_set(DATA_DIR "/y_train.bin"),
|
||||||
load_set(DATA_DIR "/X_test.bin"), load_set(DATA_DIR "/y_test.bin")
|
load_set(DATA_DIR "/X_test.bin"), load_set(DATA_DIR "/y_test.bin")
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_error_file(const char* const file_dir) noexcept {
|
void print_error_file(const char* file_dir) noexcept {
|
||||||
const char* const buff = strerror(errno);
|
const char* buff = strerror(errno);
|
||||||
fprintf(stderr, "Can't open %s, error code = %d : %s\n", file_dir, errno, buff);
|
fprintf(stderr, "Can't open %s, error code = %d : %s\n", file_dir, errno, buff);
|
||||||
// delete buff;
|
// delete buff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
|
||||||
|
// size_t result = shape[offset];
|
||||||
|
// for(size_t i = 1 + offset; i < shape.length; ++i)
|
||||||
|
// result *= shape[i];
|
||||||
|
// return result;
|
||||||
|
//}
|
||||||
|
335
cpp/data.hpp
335
cpp/data.hpp
@ -4,7 +4,7 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <stdint.h>
|
#include <memory>
|
||||||
#include "config.hpp"
|
#include "config.hpp"
|
||||||
|
|
||||||
#define BUFFER_SIZE 256
|
#define BUFFER_SIZE 256
|
||||||
@ -19,6 +19,15 @@ typedef float float32_t;
|
|||||||
typedef double float64_t;
|
typedef double float64_t;
|
||||||
typedef long double float128_t;
|
typedef long double float128_t;
|
||||||
|
|
||||||
|
__host__ __device__
|
||||||
|
constexpr inline int print(const char* str) noexcept {
|
||||||
|
return printf("%s\n", str);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int print(const std::string& s) noexcept {
|
||||||
|
return printf("%s\n", s.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
namespace np {
|
namespace np {
|
||||||
constexpr const float64_t inf = std::numeric_limits<float64_t>::infinity();
|
constexpr const float64_t inf = std::numeric_limits<float64_t>::infinity();
|
||||||
|
|
||||||
@ -35,16 +44,16 @@ namespace np {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape(void) noexcept {
|
Shape() noexcept {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Shape created (default)\n");
|
// print("Shape created (default)");
|
||||||
#endif
|
// #endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape(const size_t& length, size_t* const data) noexcept : length(length), data(data), refcount(new size_t(1)) {
|
Shape(const size_t& length, size_t* data) noexcept : length(length), data(data), refcount(new size_t(1)) {
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Shape created (raw)\n");
|
//print("Shape created (raw)");
|
||||||
for(size_t i = 0; i < length; ++i)
|
for(size_t i = 0; i < length; ++i)
|
||||||
total *= data[i];
|
total *= data[i];
|
||||||
#endif
|
#endif
|
||||||
@ -52,10 +61,10 @@ namespace np {
|
|||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
|
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Shape created (initializer)\n");
|
// print("Shape created (initializer)");
|
||||||
#endif
|
// #endif
|
||||||
const size_t* const begin = dims.begin();
|
const size_t* begin = dims.begin();
|
||||||
for(size_t i = 0; i < length; ++i){
|
for(size_t i = 0; i < length; ++i){
|
||||||
data[i] = begin[i];
|
data[i] = begin[i];
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
@ -67,49 +76,52 @@ namespace np {
|
|||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape(const Shape& shape) noexcept {
|
Shape(const Shape& shape) noexcept {
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Shape created (copy)\n");
|
print("Shape created (copy)");
|
||||||
#endif
|
#endif
|
||||||
if (data != nullptr && data != shape.data){
|
if (data != nullptr && data != shape.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape deleted (copy)\n");
|
print("Former shape deleted (copy)");
|
||||||
#endif
|
#endif
|
||||||
delete[] data;
|
delete[] data;
|
||||||
}
|
}
|
||||||
if (refcount != nullptr && refcount != shape.refcount){
|
if (refcount != nullptr && refcount != shape.refcount){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape refcount freed (copy)\n");
|
print("Former shape refcount freed (copy)");
|
||||||
#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
}
|
}
|
||||||
length = shape.length;
|
length = shape.length;
|
||||||
|
|
||||||
|
//data = new size_t[length];
|
||||||
|
//memcpy(data, shape.data, length * sizeof(size_t));
|
||||||
|
//refcount = new size_t;
|
||||||
|
//memcpy(refcount, shape.refcount, sizeof(size_t));
|
||||||
|
|
||||||
data = shape.data;
|
data = shape.data;
|
||||||
refcount = shape.refcount;
|
refcount = shape.refcount;
|
||||||
|
|
||||||
if (refcount != nullptr)
|
if (refcount != nullptr)
|
||||||
(*refcount)++;
|
(*refcount)++;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
else
|
else
|
||||||
printf("Moved shape has null refcount\n");
|
print("Moved shape has null refcount");
|
||||||
#endif
|
|
||||||
#if __DEBUG
|
|
||||||
total = shape.total;
|
total = shape.total;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape(Shape&& shape) noexcept {
|
Shape(Shape&& shape) noexcept {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Shape created (move)\n");
|
// print("Shape created (move));
|
||||||
#endif
|
// #endif
|
||||||
if (data != nullptr && data != shape.data){
|
if (data != nullptr && data != shape.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape deleted (move)\n");
|
print("Former shape deleted (move)");
|
||||||
#endif
|
#endif
|
||||||
delete[] data;
|
delete[] data;
|
||||||
}
|
}
|
||||||
if (refcount != nullptr && refcount != shape.refcount){
|
if (refcount != nullptr && refcount != shape.refcount){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape refcount freed (move)\n");
|
print("Former shape refcount freed (move)");
|
||||||
#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
}
|
}
|
||||||
@ -127,29 +139,29 @@ namespace np {
|
|||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
~Shape(void) noexcept {
|
~Shape() noexcept {
|
||||||
if(refcount == nullptr){
|
if(refcount == nullptr){
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Shape refcount freed more than once\n");
|
// print("Shape refcount freed more than once");
|
||||||
#endif
|
// #endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
--(*refcount);
|
--(*refcount);
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Shape destructed : %lu\n", *refcount);
|
// printf("Shape destructed : %lu\n", *refcount);
|
||||||
#endif
|
// #endif
|
||||||
if(*refcount == 0){
|
if(*refcount == 0){
|
||||||
if (data != nullptr){
|
if (data != nullptr){
|
||||||
delete[] data;
|
delete[] data;
|
||||||
data = nullptr;
|
data = nullptr;
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Shape freeing ...\n");
|
// print("Shape freeing ...");
|
||||||
#endif
|
// #endif
|
||||||
}
|
}
|
||||||
#if __DEBUG
|
//#if __DEBUG
|
||||||
else
|
else
|
||||||
printf("Shape freed more than once : %lu\n", *refcount);
|
printf("Shape freed more than once : %lu\n", *refcount);
|
||||||
#endif
|
//#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
refcount = nullptr;
|
refcount = nullptr;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
@ -161,29 +173,34 @@ namespace np {
|
|||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape& operator=(const Shape& shape) noexcept {
|
Shape& operator=(const Shape& shape) noexcept {
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Shape created (assign copy)\n");
|
print("Shape created (assign copy)");
|
||||||
#endif
|
#endif
|
||||||
if (data != nullptr && data != shape.data){
|
if (data != nullptr && data != shape.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape deleted (assign copy)\n");
|
print("Former shape deleted (assign copy)");
|
||||||
#endif
|
#endif
|
||||||
delete[] data;
|
delete[] data;
|
||||||
}
|
}
|
||||||
if (refcount != nullptr && refcount != shape.refcount){
|
if (refcount != nullptr && refcount != shape.refcount){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape refcount freed (assign copy)\n");
|
print("Former shape refcount freed (assign copy)");
|
||||||
#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
}
|
}
|
||||||
length = shape.length;
|
length = shape.length;
|
||||||
|
|
||||||
|
// data = new size_t[length];
|
||||||
|
// memcpy(data, shape.data, length * sizeof(size_t));
|
||||||
|
// refcount = new size_t;
|
||||||
|
// memcpy(refcount, shape.refcount, sizeof(size_t));
|
||||||
|
|
||||||
data = shape.data;
|
data = shape.data;
|
||||||
refcount = shape.refcount;
|
refcount = shape.refcount;
|
||||||
|
|
||||||
if (refcount != nullptr)
|
if (refcount != nullptr)
|
||||||
(*refcount)++;
|
(*refcount)++;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
else
|
else
|
||||||
printf("Assigned copy shape has null refcount\n");
|
printf("Assigned copy shape has null refcount");
|
||||||
total = shape.total;
|
total = shape.total;
|
||||||
#endif
|
#endif
|
||||||
return *this;
|
return *this;
|
||||||
@ -191,18 +208,18 @@ namespace np {
|
|||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Shape& operator=(Shape&& shape) noexcept {
|
Shape& operator=(Shape&& shape) noexcept {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Shape created (assign move)\n");
|
// print("Shape created (assign move)");
|
||||||
#endif
|
// #endif
|
||||||
if (data != nullptr && data != shape.data){
|
if (data != nullptr && data != shape.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape deleted (assign move)\n");
|
print("Former shape deleted (assign move)");
|
||||||
#endif
|
#endif
|
||||||
delete[] data;
|
delete[] data;
|
||||||
}
|
}
|
||||||
if (refcount != nullptr && refcount != shape.refcount){
|
if (refcount != nullptr && refcount != shape.refcount){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former shape refcount freed (assign move)\n");
|
print("Former shape refcount freed (assign move)");
|
||||||
#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
}
|
}
|
||||||
@ -210,9 +227,9 @@ namespace np {
|
|||||||
data = shape.data;
|
data = shape.data;
|
||||||
refcount = shape.refcount;
|
refcount = shape.refcount;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
if (refcount == nullptr)
|
|
||||||
printf("Assigned copy shape has null refcount\n");
|
|
||||||
total = shape.total;
|
total = shape.total;
|
||||||
|
if (refcount == nullptr)
|
||||||
|
print("Assigned copy shape has null refcount");
|
||||||
shape.total = 1;
|
shape.total = 1;
|
||||||
#endif
|
#endif
|
||||||
shape.length = 0;
|
shape.length = 0;
|
||||||
@ -263,57 +280,62 @@ namespace np {
|
|||||||
size_t* refcount = nullptr;
|
size_t* refcount = nullptr;
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array(void) noexcept {
|
Array() noexcept {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array created (default)\n");
|
// print("Array created (default)");
|
||||||
#endif
|
// #endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
Array(const Shape& shape, T* data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array created (raw, copy shape)\n");
|
// print("Array created (raw, copy shape)");
|
||||||
#endif
|
// #endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array created (raw empty, copy shape)\n");
|
// print("Array created (raw empty, copy shape)");
|
||||||
#endif
|
// #endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
Array(Shape&& shape, T* data) noexcept : shape(std::move(shape)), data(data), refcount(new size_t(1)) {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array created (raw, move shape)\n");
|
// print("Array created (raw, move shape)");
|
||||||
#endif
|
// #endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
Array(Shape&& shape) noexcept : shape(std::move(shape)), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array created (raw empty, move shape)\n");
|
// print("Array created (raw empty, move shape)");
|
||||||
#endif
|
// #endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array(const Array& array) noexcept : shape(array.shape) {
|
Array(const Array& array) noexcept : shape(array.shape) {
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Array created (copy)\n");
|
print("Array created (copy)");
|
||||||
#endif
|
#endif
|
||||||
if (data != nullptr && data != array.data){
|
if (data != nullptr && data != array.data){
|
||||||
#if __DEBUG
|
#ifdef __debug
|
||||||
printf("Former array deleted (copy)\n");
|
print("Former array deleted (move)");
|
||||||
#endif
|
#endif
|
||||||
delete[] data;
|
delete[] data;
|
||||||
}
|
}
|
||||||
if (refcount != nullptr && refcount != array.refcount){
|
if (refcount != nullptr && refcount != array.refcount){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array refcount freed (copy)\n");
|
print("Former array refcount freed (move)");
|
||||||
#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
}
|
}
|
||||||
|
// const size_t size = np::prod(shape);
|
||||||
|
// data = new T[size];
|
||||||
|
// memcpy(data, array.data, size);
|
||||||
|
// refcount = new size_t;
|
||||||
|
// memcpy(refcount, array.refcount, sizeof(size_t));
|
||||||
|
|
||||||
data = array.data;
|
data = array.data;
|
||||||
refcount = array.refcount;
|
refcount = array.refcount;
|
||||||
@ -321,27 +343,28 @@ namespace np {
|
|||||||
(*refcount)++;
|
(*refcount)++;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
else
|
else
|
||||||
printf("Moved array has null refcount\n");
|
print("Moved array has null refcount");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array(Array&& array) noexcept : shape(std::move(array.shape)) {
|
Array(Array&& array) noexcept {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array created (move)\n");
|
// print("Array created (move)");
|
||||||
#endif
|
// #endif
|
||||||
if (data != nullptr && data != array.data){
|
if (data != nullptr && data != array.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array deleted (move)\n");
|
print("Former array deleted (move)");
|
||||||
#endif
|
#endif
|
||||||
delete[] data;
|
delete[] data;
|
||||||
}
|
}
|
||||||
if (refcount != nullptr && refcount != array.refcount){
|
if (refcount != nullptr && refcount != array.refcount){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array refcount freed (move)\n");
|
print("Former array refcount freed (move)");
|
||||||
#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
}
|
}
|
||||||
|
shape = std::move(array.shape);
|
||||||
data = array.data;
|
data = array.data;
|
||||||
refcount = array.refcount;
|
refcount = array.refcount;
|
||||||
|
|
||||||
@ -350,24 +373,24 @@ namespace np {
|
|||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
~Array(void) noexcept {
|
~Array() noexcept {
|
||||||
if(refcount == nullptr){
|
if(refcount == nullptr){
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array refcount freed more than once\n");
|
// print("Array refcount freed more than once");
|
||||||
#endif
|
// #endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
--(*refcount);
|
--(*refcount);
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array destructed : %lu\n", *refcount);
|
// printf("Array destructed : %lu\n", *refcount);
|
||||||
#endif
|
// #endif
|
||||||
if(*refcount == 0){
|
if(*refcount == 0){
|
||||||
if (data != nullptr){
|
if (data != nullptr){
|
||||||
delete[] data;
|
delete[] data;
|
||||||
data = nullptr;
|
data = nullptr;
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array freeing ...\n");
|
// print("Array freeing ...");
|
||||||
#endif
|
// #endif
|
||||||
}
|
}
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
else
|
else
|
||||||
@ -381,47 +404,53 @@ namespace np {
|
|||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array& operator=(const Array& array) noexcept {
|
Array& operator=(const Array& array) noexcept {
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Array created (assign copy)\n");
|
print("Array created (assign copy)");
|
||||||
#endif
|
#endif
|
||||||
if (data != nullptr && data != array.data){
|
if (data != nullptr && data != array.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array deleted (assign copy)\n");
|
print("Former array deleted (assign copy)");
|
||||||
#endif
|
#endif
|
||||||
delete[] data;
|
delete[] data;
|
||||||
}
|
}
|
||||||
if (refcount != nullptr && refcount != array.refcount){
|
if (refcount != nullptr && refcount != array.refcount){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array refcount freed (assign copy)\n");
|
print("Former array refcount freed (assign copy)");
|
||||||
#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
}
|
}
|
||||||
shape = array.shape;
|
shape = array.shape;
|
||||||
|
|
||||||
|
// const size_t size = np::prod(shape) * sizeof(T);
|
||||||
|
// data = new T[size];
|
||||||
|
// memcpy(data, array.data, size);
|
||||||
|
// refcount = new size_t;
|
||||||
|
// memcpy(refcount, array.refcount, sizeof(size_t));
|
||||||
|
|
||||||
data = array.data;
|
data = array.data;
|
||||||
refcount = array.refcount;
|
refcount = array.refcount;
|
||||||
|
|
||||||
if (refcount != nullptr)
|
if (refcount != nullptr)
|
||||||
(*refcount)++;
|
(*refcount)++;
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
else
|
else
|
||||||
printf("Assigned array has null refcount\n");
|
print("Assigned array has null refcount");
|
||||||
#endif
|
#endif
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
Array& operator=(Array&& array) noexcept {
|
Array& operator=(Array&& array) noexcept {
|
||||||
#if __DEBUG
|
// #if __DEBUG
|
||||||
printf("Array created (assign move)\n");
|
// print("Array created (assign move)");
|
||||||
#endif
|
// #endif
|
||||||
if (data != nullptr && data != array.data){
|
if (data != nullptr && data != array.data){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array deleted (assign move)\n");
|
print("Former array deleted (assign move)");
|
||||||
#endif
|
#endif
|
||||||
delete[] data;
|
delete[] data;
|
||||||
}
|
}
|
||||||
if (refcount != nullptr && refcount != array.refcount){
|
if (refcount != nullptr && refcount != array.refcount){
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Former array refcount freed (assign move)\n");
|
print("Former array refcount freed (assign move)");
|
||||||
#endif
|
#endif
|
||||||
delete refcount;
|
delete refcount;
|
||||||
}
|
}
|
||||||
@ -456,39 +485,35 @@ namespace np {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline Array<T> empty(Shape&& shape) noexcept {
|
Array<T> empty(Shape&& shape) noexcept {
|
||||||
return Array<T>(shape);
|
return { std::move(shape), new T[np::prod(shape)] };
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline Array<T> empty(const Shape& shape) noexcept {
|
Array<T> empty(const Shape& shape) noexcept {
|
||||||
return Array<T>(shape);
|
return { std::move(shape), new T[np::prod(shape)] };
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline Array<T> empty(const std::initializer_list<size_t>& dims) noexcept {
|
Array<T> empty(const std::initializer_list<size_t>& dims) noexcept {
|
||||||
return Array<T>(dims);
|
const Shape shape(dims);
|
||||||
|
return { std::move(shape), new T[np::prod(shape)] };
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
Array<T> zeros(Shape&& shape) noexcept {
|
Array<T> zeros(Shape&& shape) noexcept {
|
||||||
Array<T> res(shape);
|
return { std::move(shape), new T[np::prod(shape)]{0} };
|
||||||
memset(res.data, 0, sizeof(T) * np::prod(res.shape));
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
Array<T> zeros(const Shape& shape) noexcept {
|
Array<T> zeros(const Shape& shape) noexcept {
|
||||||
Array<T> res(shape);
|
return { std::move(shape), new T[np::prod(shape)]{0} };
|
||||||
memset(res.data, 0, sizeof(T) * np::prod(res.shape));
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept {
|
Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept {
|
||||||
Array<T> res(dims);
|
const Shape shape(dims);
|
||||||
memset(res.data, 0, sizeof(T) * np::prod(res.shape));
|
return { std::move(shape), new T[np::prod(shape)]{0} };
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -748,7 +773,7 @@ constexpr np::Array<T>& map(np::Array<T>& a, const std::function<T(const size_t&
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__host__ __device__
|
__host__ __device__
|
||||||
constexpr inline static void swap(T* const a, T* const b) noexcept {
|
constexpr inline static void swap(T* a, T* b) noexcept {
|
||||||
if (a == b) return;
|
if (a == b) return;
|
||||||
const T temp = *a;
|
const T temp = *a;
|
||||||
*a = *b;
|
*a = *b;
|
||||||
@ -780,7 +805,7 @@ void quicksort(const np::Array<T>& a) noexcept {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static size_t as_partition(const T* const a, uint16_t* const indices, const size_t& l, const size_t& h) noexcept {
|
static size_t as_partition(const T* a, uint16_t* indices, const size_t& l, const size_t& h) noexcept {
|
||||||
size_t i = l - 1;
|
size_t i = l - 1;
|
||||||
for (size_t j = l; j <= h; ++j)
|
for (size_t j = l; j <= h; ++j)
|
||||||
if (a[indices[j]] < a[indices[h]])
|
if (a[indices[j]] < a[indices[h]])
|
||||||
@ -789,27 +814,69 @@ static size_t as_partition(const T* const a, uint16_t* const indices, const size
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<np::Array<uint8_t>, 4> load_datasets(void);
|
template<typename T>
|
||||||
void print_error_file(const char* const) noexcept;
|
void argsort(const T* a, uint16_t* indices, const size_t& l, const size_t& h) noexcept {
|
||||||
|
const size_t total = h - l + 1;
|
||||||
|
|
||||||
|
size_t* stack = new size_t[total]{l, h};
|
||||||
|
size_t top = 1, low = l, high = h;
|
||||||
|
|
||||||
|
while (top <= total) {
|
||||||
|
high = stack[top--];
|
||||||
|
low = stack[top--];
|
||||||
|
if(low >= high)
|
||||||
|
break;
|
||||||
|
|
||||||
|
const size_t p = as_partition(a, indices, low, high);
|
||||||
|
|
||||||
|
if (p - 1 > low && p - 1 < total) {
|
||||||
|
stack[++top] = low;
|
||||||
|
stack[++top] = p - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p + 1 < high) {
|
||||||
|
stack[++top] = p + 1;
|
||||||
|
stack[++top] = high;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete[] stack;
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void save(const np::Array<T>& d, const char* const filename) {
|
np::Array<uint16_t> argsort(const np::Array<T>& other, const size_t& l, const size_t& h) noexcept {
|
||||||
FILE* const output = fopen(filename, "wb");
|
np::Array<uint16_t> indices = np::empty(other.shape);
|
||||||
|
map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; });
|
||||||
|
|
||||||
|
argsort(other, indices, l, h);
|
||||||
|
return indices;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
np::Array<uint16_t> argsort(const np::Array<T>* other, const size_t& length) noexcept {
|
||||||
|
return argsort(other, 0, length - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<np::Array<uint8_t>, 4> load_datasets(void);
|
||||||
|
void print_error_file(const char*) noexcept;
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void save(const np::Array<T>& d, const char* filename) {
|
||||||
|
FILE* output = fopen(filename, "wb");
|
||||||
if (output == NULL) {
|
if (output == NULL) {
|
||||||
print_error_file(filename);
|
print_error_file(filename);
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
assert(d.shape.refcount != 0);
|
assert(d.shape.refcount != 0);//, "Refcount shape is zero !!");
|
||||||
fwrite(&d.shape.length, sizeof(size_t), 1, output);
|
fwrite(&d.shape.length, sizeof(size_t), 1, output);
|
||||||
fwrite(d.shape.data, sizeof(size_t), d.shape.length, output);
|
fwrite(d.shape.data, sizeof(size_t), d.shape.length, output);
|
||||||
assert(d.refcount != 0);
|
assert(d.refcount != 0);//, "Refcount array is zero !!");
|
||||||
fwrite(d.data, sizeof(T), np::prod(d.shape), output);
|
fwrite(d.data, sizeof(T), np::prod(d.shape), output);
|
||||||
fclose(output);
|
fclose(output);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
np::Array<T> load(const char* const filename) {
|
np::Array<T> load(const char* filename) {
|
||||||
FILE* const input = fopen(filename, "rb");
|
FILE* input = fopen(filename, "rb");
|
||||||
if (input == NULL) {
|
if (input == NULL) {
|
||||||
print_error_file(filename);
|
print_error_file(filename);
|
||||||
throw;
|
throw;
|
||||||
@ -820,7 +887,7 @@ np::Array<T> load(const char* const filename) {
|
|||||||
fclose(input);
|
fclose(input);
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
size_t* const data = new size_t[length];
|
size_t* data = new size_t[length];
|
||||||
if(!fread(data, sizeof(size_t), length, input)){
|
if(!fread(data, sizeof(size_t), length, input)){
|
||||||
print_error_file(filename);
|
print_error_file(filename);
|
||||||
fclose(input);
|
fclose(input);
|
||||||
@ -838,7 +905,7 @@ np::Array<T> load(const char* const filename) {
|
|||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
template<typename T>
|
template<typename T>
|
||||||
np::Array<T> copyToDevice(const char* const name, const np::Array<T>& array) noexcept {
|
np::Array<T> copyToDevice(const char* name, const np::Array<T>& array) noexcept {
|
||||||
const size_t array_size = np::prod(array.shape) * sizeof(T);
|
const size_t array_size = np::prod(array.shape) * sizeof(T);
|
||||||
const size_t shape_size = array.shape.length * sizeof(size_t);
|
const size_t shape_size = array.shape.length * sizeof(size_t);
|
||||||
np::Array<T> d_array;
|
np::Array<T> d_array;
|
||||||
@ -858,7 +925,7 @@ np::Array<T> copyToDevice(const char* const name, const np::Array<T>& array) noe
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
constexpr void cudaFree(const char* const name, np::Array<T>& array) noexcept {
|
constexpr void cudaFree(const char* name, np::Array<T>& array) noexcept {
|
||||||
//_print_cuda_error_(name, cudaFree(array.refcount));
|
//_print_cuda_error_(name, cudaFree(array.refcount));
|
||||||
//array.refcount = nullptr;
|
//array.refcount = nullptr;
|
||||||
_print_cuda_error_(name, cudaFree(array.data));
|
_print_cuda_error_(name, cudaFree(array.data));
|
||||||
@ -869,16 +936,16 @@ constexpr void cudaFree(const char* const name, np::Array<T>& array) noexcept {
|
|||||||
array.shape.data = nullptr;
|
array.shape.data = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr inline void _print_cuda_error_(const char* const name, const cudaError_t& err) noexcept {
|
constexpr inline void _print_cuda_error_(const char* name, const cudaError_t& err) noexcept {
|
||||||
if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err));
|
if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int32_t print(const np::Shape&) noexcept;
|
int print(const np::Shape&) noexcept;
|
||||||
int32_t print(const np::Array<uint8_t>&) noexcept;
|
int print(const np::Array<uint8_t>&) noexcept;
|
||||||
int32_t print(const np::Array<float64_t>&) noexcept;
|
int print(const np::Array<float64_t>&) noexcept;
|
||||||
int32_t print(const np::Array<uint8_t>&, const np::Slice&) noexcept;
|
int print(const np::Array<uint8_t>&, const np::Slice&) noexcept;
|
||||||
int32_t print(const np::Array<uint32_t>&, const np::Slice&) noexcept;
|
int print(const np::Array<uint32_t>&, const np::Slice&) noexcept;
|
||||||
int32_t print(const np::Array<int32_t>&, const np::Slice&) noexcept;
|
int print(const np::Array<int32_t>&, const np::Slice&) noexcept;
|
||||||
int32_t print(const np::Array<uint16_t>&, const np::Slice&) noexcept;
|
int print(const np::Array<uint16_t>&, const np::Slice&) noexcept;
|
||||||
int32_t print_feat(const np::Array<uint8_t>&, const np::Slice&) noexcept;
|
int print_feat(const np::Array<uint8_t>&, const np::Slice&) noexcept;
|
||||||
|
@ -1,16 +0,0 @@
|
|||||||
#include "data.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Product of every elements in a given shape after a given offset.
|
|
||||||
*
|
|
||||||
* @param shape Shape to product over
|
|
||||||
* @param offset Skip offset
|
|
||||||
* @return Scalar product
|
|
||||||
*/
|
|
||||||
__host__ __device__
|
|
||||||
size_t np::prod(const np::Shape& shape, const size_t& offset) noexcept {
|
|
||||||
size_t result = shape[offset];
|
|
||||||
for(size_t i = 1 + offset; i < shape.length; ++i)
|
|
||||||
result *= shape[i];
|
|
||||||
return result;
|
|
||||||
}
|
|
@ -1,15 +0,0 @@
|
|||||||
services:
|
|
||||||
violajones-cpp:
|
|
||||||
image: saundersp/violajones-cpp
|
|
||||||
build: .
|
|
||||||
volumes:
|
|
||||||
- ./models:/home/ViolaJones/cpp/models
|
|
||||||
- ./out:/home/ViolaJones/cpp/out
|
|
||||||
- ../data:/home/ViolaJones/data
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: 1
|
|
||||||
capabilities: [gpu]
|
|
@ -11,7 +11,7 @@ void test_working(const size_t& length) noexcept {
|
|||||||
const size_t size = length * sizeof(size_t);
|
const size_t size = length * sizeof(size_t);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Estimating memory footprint at : %s\n", format_byte_size(2 * size).c_str());
|
print("Estimating memory footprint at : " + format_byte_size(2 * size));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });
|
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });
|
||||||
@ -53,7 +53,7 @@ void test_working_2d(const size_t& N1, const size_t& N2) noexcept {
|
|||||||
const size_t size = length * sizeof(size_t);
|
const size_t size = length * sizeof(size_t);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Estimating memory footprint at : %s\n", format_byte_size(2 * size).c_str());
|
print("Estimating memory footprint at : " + format_byte_size(2 * size));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });
|
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });
|
||||||
@ -96,7 +96,7 @@ void test_working_3d(const size_t& N1, const size_t& N2, const size_t& N3) noexc
|
|||||||
const size_t size = length * sizeof(size_t);
|
const size_t size = length * sizeof(size_t);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("Estimating memory footprint at : %s\n", format_byte_size(2 * size).c_str());
|
print("Estimating memory footprint at : " + format_byte_size(2 * size));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });
|
np::Array<size_t> x = np::empty<size_t>({ length }), y = np::empty<size_t>({ length });
|
||||||
|
201
cpp/projet.cpp
201
cpp/projet.cpp
@ -1,54 +1,50 @@
|
|||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
namespace fs = std::filesystem;
|
||||||
#include "data.hpp"
|
#include "data.hpp"
|
||||||
#include "toolbox.hpp"
|
#include "toolbox.hpp"
|
||||||
#include "config.hpp"
|
#include "config.hpp"
|
||||||
|
#include "gpu_unit_test.hpp"
|
||||||
#include "toolbox_unit_test.hpp"
|
#include "toolbox_unit_test.hpp"
|
||||||
#include "ViolaJones.hpp"
|
#include "ViolaJones.hpp"
|
||||||
#include "ViolaJones_device.hpp"
|
#include "ViolaJonesGPU.hpp"
|
||||||
|
#include "ViolaJonesCPU.hpp"
|
||||||
|
|
||||||
#if GPU_BOOSTED
|
#if GPU_BOOSTED
|
||||||
#include "gpu_unit_test.hpp"
|
|
||||||
#define LABEL "GPU"
|
#define LABEL "GPU"
|
||||||
|
#define apply_features apply_features_gpu
|
||||||
|
#define set_integral_image set_integral_image_gpu
|
||||||
|
#define argsort_2d argsort_2d_gpu
|
||||||
#else
|
#else
|
||||||
#define LABEL "CPU"
|
#define LABEL "CPU"
|
||||||
|
#define apply_features apply_features_cpu
|
||||||
|
#define set_integral_image set_integral_image_cpu
|
||||||
|
#define argsort_2d argsort_2d_cpu
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> preprocessing() {
|
||||||
* @brief Execute the preprocessing phase
|
|
||||||
*
|
|
||||||
* The preprocessing phase consist of the following steps :
|
|
||||||
* - Load the dataset
|
|
||||||
* - Calculate features
|
|
||||||
* - Calculate integral images
|
|
||||||
* - Apply features to images
|
|
||||||
* - Calculate argsort of the featured images
|
|
||||||
*
|
|
||||||
* @return std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> Tuple containing in order : training features, training features sorted indexes, training labels, testing features, testing labels
|
|
||||||
*/
|
|
||||||
std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Array<int32_t>, np::Array<uint8_t>> preprocessing(void) {
|
|
||||||
// Creating state saver folders if they don't exist already
|
// Creating state saver folders if they don't exist already
|
||||||
if (SAVE_STATE)
|
if (SAVE_STATE)
|
||||||
for (const char* const folder_name : { "models", "out" })
|
for (const char* const folder_name : { "models", "out" })
|
||||||
std::filesystem::create_directory(folder_name);
|
fs::create_directory(folder_name);
|
||||||
|
|
||||||
const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns();
|
const std::chrono::system_clock::time_point preproc_timestamp = perf_counter_ns();
|
||||||
const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 };
|
const std::array<int32_t, 3> preproc_gaps = { 49, -18, 29 };
|
||||||
header(preproc_gaps, { "Preprocessing", "Time spent (ns)", "Formatted time spent" });
|
header({ "Preprocessing", "Time spent (ns)", "Formatted time spent" }, preproc_gaps);
|
||||||
|
|
||||||
const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", preproc_gaps[0], { "X_train", "y_train", "X_test", "y_test" },
|
const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", preproc_gaps[0], {"X_train", "y_train", "X_test", "y_test"},
|
||||||
FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets);
|
FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("X_train\n");
|
print("X_train");
|
||||||
print(X_train.shape);
|
print(X_train.shape);
|
||||||
print(X_train, { IDX_INSPECT });
|
print(X_train, { IDX_INSPECT });
|
||||||
printf("X_test\n");
|
print("X_test");
|
||||||
print(X_test.shape);
|
print(X_test.shape);
|
||||||
print(X_test, { IDX_INSPECT });
|
print(X_test, { IDX_INSPECT });
|
||||||
printf("y_train\n");
|
print("y_train");
|
||||||
print(y_train.shape);
|
print(y_train.shape);
|
||||||
print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
printf("y_test\n");
|
print("y_test");
|
||||||
print(y_test.shape);
|
print(y_test.shape);
|
||||||
print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
#endif
|
#endif
|
||||||
@ -57,7 +53,7 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]);
|
FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("feats\n");
|
print("feats");
|
||||||
print(feats.shape);
|
print(feats.shape);
|
||||||
print_feat(feats, { IDX_INSPECT });
|
print_feat(feats, { IDX_INSPECT });
|
||||||
#endif
|
#endif
|
||||||
@ -68,10 +64,10 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test);
|
FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("X_train_ii\n");
|
print("X_train_ii");
|
||||||
print(X_train_ii.shape);
|
print(X_train_ii.shape);
|
||||||
print(X_train_ii, { IDX_INSPECT });
|
print(X_train_ii, { IDX_INSPECT });
|
||||||
printf("X_test_ii\n");
|
print("X_test_ii");
|
||||||
print(X_test_ii.shape);
|
print(X_test_ii.shape);
|
||||||
print(X_test_ii, { IDX_INSPECT });
|
print(X_test_ii, { IDX_INSPECT });
|
||||||
#endif
|
#endif
|
||||||
@ -82,15 +78,16 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii);
|
FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("X_train_feat\n");
|
print("X_train_feat");
|
||||||
print(X_train_feat.shape);
|
print(X_train_feat.shape);
|
||||||
print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
printf("X_test_feat\n");
|
print("X_test_feat");
|
||||||
print(X_test_feat.shape);
|
print(X_test_feat.shape);
|
||||||
print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// const np::Array<int32_t> indices = state_saver<int32_t>("Selecting best features", preproc_gaps[0], "indices", select_percentile, X_train_feat, d.y_train);
|
// const Array<int> indices = measure_time_save<Array<int>>("Selecting best features", "indices", select_percentile, X_train_feat, d.y_train);
|
||||||
|
// const Array<int> indices = measure_time<Array<int>>("Selecting best features", select_percentile, X_train_feat, d.y_train);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
// print_feature(indices);
|
// print_feature(indices);
|
||||||
@ -100,18 +97,18 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat);
|
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("X_train_feat_argsort\n");
|
print("X_train_feat_argsort");
|
||||||
print(X_train_feat_argsort.shape);
|
print(X_train_feat_argsort.shape);
|
||||||
print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL,
|
// const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", preproc_gaps[0], "X_test_feat_argsort_" LABEL,
|
||||||
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
|
// FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
|
||||||
|
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("X_test_feat_argsort\n");
|
// printf("X_test_feat_argsort\n");
|
||||||
print(X_test_feat_argsort.shape);
|
// print(X_test_feat_argsort.shape);
|
||||||
print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
// print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||||
#endif
|
#endif
|
||||||
const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp);
|
const long long time_spent = duration_ns(perf_counter_ns() - preproc_timestamp);
|
||||||
formatted_line(preproc_gaps, "├", "┼", "─", "┤");
|
formatted_line(preproc_gaps, "├", "┼", "─", "┤");
|
||||||
@ -120,18 +117,10 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
|||||||
return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test };
|
return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Train the weak classifiers.
|
|
||||||
*
|
|
||||||
* @param X_train_feat Training images
|
|
||||||
* @param X_train_feat_argsort Sorted indexes of the training images features
|
|
||||||
* @param y_train Training labels
|
|
||||||
* @return List of trained models
|
|
||||||
*/
|
|
||||||
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) noexcept {
|
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) noexcept {
|
||||||
const std::chrono::system_clock::time_point training_timestamp = perf_counter_ns();
|
const std::chrono::system_clock::time_point training_timestamp = perf_counter_ns();
|
||||||
const std::array<int32_t, 3> training_gaps = { 26, -18, 29 };
|
const std::array<int32_t, 3> training_gaps = { 26, -18, 29 };
|
||||||
header(training_gaps, { "Training", "Time spent (ns)", "Formatted time spent" });
|
header({ "Training", "Time spent (ns)", "Formatted time spent" }, training_gaps);
|
||||||
|
|
||||||
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> models;
|
std::array<std::array<np::Array<float64_t>, 2>, TS.size()> models;
|
||||||
|
|
||||||
@ -147,9 +136,9 @@ std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array
|
|||||||
const auto [ alphas, final_classifiers ] = state_saver<float64_t, 2>(title, training_gaps[0], { alphas_title, final_classifiers_title },
|
const auto [ alphas, final_classifiers ] = state_saver<float64_t, 2>(title, training_gaps[0], { alphas_title, final_classifiers_title },
|
||||||
FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train);
|
FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train);
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("alphas\n");
|
print("alphas");
|
||||||
print(alphas);
|
print(alphas);
|
||||||
printf("final_classifiers\n");
|
print("final_classifiers");
|
||||||
print(final_classifiers);
|
print(final_classifiers);
|
||||||
#endif
|
#endif
|
||||||
models[i++] = { alphas, final_classifiers };
|
models[i++] = { alphas, final_classifiers };
|
||||||
@ -162,18 +151,9 @@ std::array<std::array<np::Array<float64_t>, 2>, TS.size()> train(const np::Array
|
|||||||
return models;
|
return models;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Benchmark the trained classifiers on the training and testing sets.
|
|
||||||
*
|
|
||||||
* @param models List of trained models
|
|
||||||
* @param X_train_feat Training features
|
|
||||||
* @param y_train Training labels
|
|
||||||
* @param X_test_feat Testing features
|
|
||||||
* @param y_test Testing labels
|
|
||||||
*/
|
|
||||||
void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>, TS.size()>& models, const np::Array<int32_t>& X_train_feat, const np::Array<uint8_t>& y_train, const np::Array<int32_t>& X_test_feat, const np::Array<uint8_t>& y_test) {
|
void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>, TS.size()>& models, const np::Array<int32_t>& X_train_feat, const np::Array<uint8_t>& y_train, const np::Array<int32_t>& X_test_feat, const np::Array<uint8_t>& y_test) {
|
||||||
const std::array<int32_t, 5> testing_gaps = { 26, -19, 24, -19, 24 };
|
const std::array<int32_t, 5> testing_gaps = { 26, -19, 24, -19, 24 };
|
||||||
header(testing_gaps, { "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" });
|
header({ "Testing", "Time spent (ns) (E)", "Formatted time spent (E)", "Time spent (ns) (T)", "Formatted time spent (T)" }, testing_gaps);
|
||||||
std::array<std::array<float64_t, 8>, TS.size()> results;
|
std::array<std::array<float64_t, 8>, TS.size()> results;
|
||||||
|
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
@ -209,7 +189,7 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
|
|||||||
footer(testing_gaps);
|
footer(testing_gaps);
|
||||||
|
|
||||||
const std::array<int32_t, 9> evaluating_gaps = { 19, -7, -6, -6, -6, -7, -6, -6, -6 };
|
const std::array<int32_t, 9> evaluating_gaps = { 19, -7, -6, -6, -6, -7, -6, -6, -6 };
|
||||||
header(evaluating_gaps, { "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"});
|
header({ "Evaluating", "ACC (E)", "F1 (E)", "FN (E)", "FP (E)", "ACC (T)", "F1 (T)", "FN (T)", "FP (T)"}, evaluating_gaps);
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
for (const size_t T : TS) {
|
for (const size_t T : TS) {
|
||||||
@ -221,16 +201,10 @@ void testing_and_evaluating(const std::array<std::array<np::Array<float64_t>, 2>
|
|||||||
footer(evaluating_gaps);
|
footer(evaluating_gaps);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test if the each result is equals to other devices.
|
|
||||||
*
|
|
||||||
* Given ViolaJones is a fully deterministic algorithm. The results, regardless the device, should be the same,
|
|
||||||
* this function check this assertion.
|
|
||||||
*/
|
|
||||||
void unit_test(void) {
|
void unit_test(void) {
|
||||||
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
|
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
|
||||||
const std::array<int32_t, 4> unit_gaps = { 37, -10, -18, 29};
|
const std::array<int32_t, 4> unit_gaps = { 37, -10, -18, 29};
|
||||||
header(unit_gaps, { "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" });
|
header({ "Unit testing", "Test state", "Time spent (ns)", "Formatted time spent" }, unit_gaps);
|
||||||
|
|
||||||
char title[BUFFER_SIZE] = { 0 };
|
char title[BUFFER_SIZE] = { 0 };
|
||||||
char tmp_title[BUFFER_SIZE / 2] = { 0 };
|
char tmp_title[BUFFER_SIZE / 2] = { 0 };
|
||||||
@ -250,104 +224,81 @@ void unit_test(void) {
|
|||||||
formatted_row(unit_gaps, { title, "Failed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
formatted_row(unit_gaps, { title, "Failed", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const char* const label : { "train", "test" }) {
|
for (const char* label : { "train", "test" }) {
|
||||||
snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_ii_CPU.bin", label);
|
sprintf(file_cpu, OUT_DIR "/X_%s_ii_CPU.bin", label);
|
||||||
snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_ii_GPU.bin", label);
|
sprintf(file_gpu, OUT_DIR "/X_%s_ii_GPU.bin", label);
|
||||||
if (std::filesystem::exists(file_cpu) && std::filesystem::exists(file_gpu)) {
|
if (fs::exists(file_cpu) && fs::exists(file_gpu)) {
|
||||||
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_ii", label);
|
|
||||||
snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title);
|
|
||||||
test_fnc(title, [&file_cpu, &file_gpu]{
|
|
||||||
const np::Array<uint32_t> X_train_ii_cpu = load<uint32_t>(file_cpu);
|
const np::Array<uint32_t> X_train_ii_cpu = load<uint32_t>(file_cpu);
|
||||||
const np::Array<uint32_t> X_train_ii_gpu = load<uint32_t>(file_gpu);
|
const np::Array<uint32_t> X_train_ii_gpu = load<uint32_t>(file_gpu);
|
||||||
return unit_test_cpu_vs_gpu<uint32_t>(X_train_ii_cpu, X_train_ii_gpu);
|
sprintf(tmp_title, "X_%s_ii", label);
|
||||||
});
|
sprintf(title, "%-22s - CPU vs GPU", tmp_title);
|
||||||
|
test_fnc(title, [&X_train_ii_cpu, &X_train_ii_gpu]{ return unit_test_cpu_vs_gpu<uint32_t>(X_train_ii_cpu, X_train_ii_gpu); });
|
||||||
}
|
}
|
||||||
snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_CPU.bin", label);
|
|
||||||
snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_GPU.bin", label);
|
|
||||||
uint8_t feat = 0;
|
|
||||||
char file_feat[BUFFER_SIZE] = { 0 };
|
char file_feat[BUFFER_SIZE] = { 0 };
|
||||||
if (std::filesystem::exists(file_cpu)) {
|
sprintf(file_feat, OUT_DIR "/X_%s_feat_CPU.bin", label);
|
||||||
strncpy(file_feat, file_cpu, BUFFER_SIZE);
|
if (fs::exists(file_feat)) {
|
||||||
feat = 1;
|
|
||||||
} else if (std::filesystem::exists(file_gpu)) {
|
|
||||||
strncpy(file_feat, file_gpu, BUFFER_SIZE);
|
|
||||||
feat = 2;
|
|
||||||
}
|
|
||||||
if (feat != 0) {
|
|
||||||
const np::Array<int32_t> X_feat = load<int32_t>(file_feat);
|
const np::Array<int32_t> X_feat = load<int32_t>(file_feat);
|
||||||
snprintf(file_gpu, BUFFER_SIZE, feat == 1 ? OUT_DIR "/X_%s_feat_GPU.bin" : OUT_DIR "/X_%s_feat_CPU.bin", label);
|
sprintf(file_gpu, OUT_DIR "/X_%s_feat_GPU.bin", label);
|
||||||
if (std::filesystem::exists(file_gpu)) {
|
if (fs::exists(file_gpu)) {
|
||||||
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat", label);
|
const np::Array<int32_t> X_feat_gpu = load<int32_t>(file_gpu);
|
||||||
snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title);
|
sprintf(tmp_title, "X_%s_feat", label);
|
||||||
test_fnc(title, [&X_feat, &file_gpu]{
|
sprintf(title, "%-22s - CPU vs GPU", tmp_title);
|
||||||
const np::Array<int32_t> X_feat_aux = load<int32_t>(file_gpu);
|
test_fnc(title, [&X_feat, &X_feat_gpu]{ return unit_test_cpu_vs_gpu<int32_t>(X_feat, X_feat_gpu); });
|
||||||
return unit_test_cpu_vs_gpu<int32_t>(X_feat, X_feat_aux);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
snprintf(file_cpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_argsort_CPU.bin", label);
|
sprintf(file_cpu, OUT_DIR "/X_%s_feat_argsort_CPU.bin", label);
|
||||||
np::Array<uint16_t> X_feat_argsort_cpu;
|
np::Array<uint16_t> X_feat_argsort_cpu;
|
||||||
uint8_t loaded = 0;
|
uint8_t loaded = 0;
|
||||||
if (std::filesystem::exists(file_cpu)) {
|
if (fs::exists(file_cpu)) {
|
||||||
|
X_feat_argsort_cpu = std::move(load<uint16_t>(file_cpu));
|
||||||
++loaded;
|
++loaded;
|
||||||
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label);
|
sprintf(tmp_title, "X_%s_feat_argsort", label);
|
||||||
snprintf(title, BUFFER_SIZE, "%-22s - CPU argsort", tmp_title);
|
sprintf(title, "%-22s - CPU argsort", tmp_title);
|
||||||
test_fnc(title, [&X_feat, &X_feat_argsort_cpu, &file_cpu]{
|
test_fnc(title, [&X_feat, &X_feat_argsort_cpu]{ return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_cpu); });
|
||||||
X_feat_argsort_cpu = load<uint16_t>(file_cpu);
|
|
||||||
return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_cpu);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
snprintf(file_gpu, BUFFER_SIZE, OUT_DIR "/X_%s_feat_argsort_GPU.bin", label);
|
sprintf(file_gpu, OUT_DIR "/X_%s_feat_argsort_GPU.bin", label);
|
||||||
np::Array<uint16_t> X_feat_argsort_gpu;
|
np::Array<uint16_t> X_feat_argsort_gpu;
|
||||||
if (std::filesystem::exists(file_gpu)) {
|
if (fs::exists(file_gpu)) {
|
||||||
|
X_feat_argsort_gpu = std::move(load<uint16_t>(file_gpu));
|
||||||
++loaded;
|
++loaded;
|
||||||
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label);
|
sprintf(tmp_title, "X_%s_feat_argsort", label);
|
||||||
snprintf(title, BUFFER_SIZE, "%-22s - GPU argsort", tmp_title);
|
sprintf(title, "%-22s - GPU argsort", tmp_title);
|
||||||
test_fnc(title, [&X_feat, &X_feat_argsort_gpu, &file_gpu]{
|
test_fnc(title, [&X_feat, &X_feat_argsort_gpu]{ return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_gpu); });
|
||||||
X_feat_argsort_gpu = load<uint16_t>(file_gpu);
|
|
||||||
return unit_test_argsort_2d<int32_t>(X_feat, X_feat_argsort_gpu);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
if (loaded == 2){
|
if (loaded == 2){
|
||||||
snprintf(tmp_title, BUFFER_SIZE / 2, "X_%s_feat_argsort", label);
|
sprintf(tmp_title, "X_%s_feat_argsort", label);
|
||||||
snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title);
|
sprintf(title, "%-22s - CPU vs GPU", tmp_title);
|
||||||
test_fnc(title, [&X_feat_argsort_cpu, &X_feat_argsort_gpu]{ return unit_test_cpu_vs_gpu<uint16_t>(X_feat_argsort_cpu, X_feat_argsort_gpu); });
|
test_fnc(title, [&X_feat_argsort_cpu, &X_feat_argsort_gpu]{ return unit_test_cpu_vs_gpu<uint16_t>(X_feat_argsort_cpu, X_feat_argsort_gpu); });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const size_t T : TS)
|
for (const size_t T : TS)
|
||||||
for (const char* const label : { "alphas", "final_classifiers" }) {
|
for (const char* label : { "alphas", "final_classifiers" }) {
|
||||||
snprintf(file_cpu, BUFFER_SIZE, MODEL_DIR "/%s_%lu_CPU.bin", label, T);
|
sprintf(file_cpu, MODEL_DIR "/%s_%lu_CPU.bin", label, T);
|
||||||
snprintf(file_gpu, BUFFER_SIZE, MODEL_DIR "/%s_%lu_GPU.bin", label, T);
|
sprintf(file_gpu, MODEL_DIR "/%s_%lu_GPU.bin", label, T);
|
||||||
if (std::filesystem::exists(file_cpu) && std::filesystem::exists(file_gpu)){
|
if (fs::exists(file_cpu) && fs::exists(file_gpu)){
|
||||||
snprintf(tmp_title, BUFFER_SIZE / 2, "%s_%ld", label, T);
|
|
||||||
snprintf(title, BUFFER_SIZE, "%-22s - CPU vs GPU", tmp_title);
|
|
||||||
test_fnc(title, [&file_cpu, &file_gpu]{
|
|
||||||
const np::Array<float64_t> cpu = load<float64_t>(file_cpu);
|
const np::Array<float64_t> cpu = load<float64_t>(file_cpu);
|
||||||
const np::Array<float64_t> gpu = load<float64_t>(file_gpu);
|
const np::Array<float64_t> gpu = load<float64_t>(file_gpu);
|
||||||
return unit_test_cpu_vs_gpu<float64_t>(cpu, gpu);
|
sprintf(tmp_title, "%s_%ld", label, T);
|
||||||
});
|
sprintf(title, "%-22s - CPU vs GPU", tmp_title);
|
||||||
|
test_fnc(title, [&cpu, &gpu]{ return unit_test_cpu_vs_gpu<float64_t>(cpu, gpu); });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const long long time_spent = duration_ns(perf_counter_ns() - unit_timestamp);
|
const long long time_spent = duration_ns(perf_counter_ns() - unit_timestamp);
|
||||||
|
|
||||||
if (n_total == 0)
|
|
||||||
formatted_row(unit_gaps, { "Unit testing summary", "No files", thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
|
||||||
else {
|
|
||||||
snprintf(title, BUFFER_SIZE, "%ld/%ld", n_success, n_total);
|
snprintf(title, BUFFER_SIZE, "%ld/%ld", n_success, n_total);
|
||||||
|
|
||||||
formatted_line(unit_gaps, "├", "┼", "─", "┤");
|
formatted_line(unit_gaps, "├", "┼", "─", "┤");
|
||||||
formatted_row(unit_gaps, { "Unit testing summary", title, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
formatted_row(unit_gaps, { "Unit testing summary", title, thousand_sep(time_spent).c_str(), format_time_ns(time_spent).c_str() });
|
||||||
}
|
|
||||||
footer(unit_gaps);
|
footer(unit_gaps);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t main(void){
|
int main(){
|
||||||
setlocale(LC_NUMERIC, ""); // Allow proper number display
|
setlocale(LC_NUMERIC, ""); // Allow proper number display
|
||||||
|
|
||||||
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
|
const std::chrono::system_clock::time_point unit_timestamp = perf_counter_ns();
|
||||||
const std::array<int32_t, 3> unit_gaps = { 27, -18, 29 };
|
const std::array<int32_t, 3> unit_gaps = { 27, -18, 29 };
|
||||||
header(unit_gaps, { "Unit testing", "Time spent (ns)", "Formatted time spent" });
|
header({ "Unit testing", "Time spent (ns)", "Formatted time spent" }, unit_gaps);
|
||||||
#if GPU_BOOSTED
|
#if GPU_BOOSTED
|
||||||
benchmark_function_void("Testing GPU capabilities 1D", unit_gaps[0], test_working, 50000);
|
benchmark_function_void("Testing GPU capabilities 1D", unit_gaps[0], test_working, 50000);
|
||||||
benchmark_function_void("Testing GPU capabilities 2D", unit_gaps[0], test_working_2d, 200, 500);
|
benchmark_function_void("Testing GPU capabilities 2D", unit_gaps[0], test_working_2d, 200, 500);
|
||||||
|
17
cpp/test.cpp
17
cpp/test.cpp
@ -8,22 +8,22 @@
|
|||||||
|
|
||||||
void printProgress(const float64_t& percentage) noexcept {
|
void printProgress(const float64_t& percentage) noexcept {
|
||||||
const uint64_t val = static_cast<uint64_t>(percentage * 100);
|
const uint64_t val = static_cast<uint64_t>(percentage * 100);
|
||||||
const int32_t lpad = static_cast<int32_t>(percentage * PBWIDTH);
|
const int lpad = static_cast<int>(percentage * PBWIDTH);
|
||||||
const int32_t rpad = PBWIDTH - lpad;
|
const int rpad = PBWIDTH - lpad;
|
||||||
fprintf(stderr, "%3lu%% [%.*s%*s]\r", val, lpad, PBSTR, rpad, "");
|
printf("%3lu%% [%.*s%*s]\r", val, lpad, PBSTR, rpad, "");
|
||||||
fflush(stderr);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
void clearProgress(void) noexcept {
|
void clearProgress() noexcept {
|
||||||
// Progress bar width + space before + num space + space after
|
// Progress bar width + space before + num space + space after
|
||||||
fprintf(stderr, "%*c\r", PBWIDTH + 1 + 3 + 3, ' ');
|
printf("%*c\r", PBWIDTH + 1 + 3 + 3, ' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test(const uint64_t& N) noexcept {
|
void test(const uint64_t& N) noexcept {
|
||||||
#if __DEBUG
|
#if __DEBUG
|
||||||
printf("DETERMINISTIC for N=%s of %s sized %s\n", thousand_sep(N).c_str(), typeid(T).name(), format_byte_size(sizeof(T)).c_str());
|
printf("DETERMINISTIC for N=%s of %s sized %s\n", thousand_sep(N).c_str(), typeid(T).name(), format_byte_size(sizeof(T)).c_str());
|
||||||
printf("Estimating memory footprint at : %s\n", format_byte_size(3 * N * sizeof(T)).c_str());
|
print("Estimating memory footprint at : " + format_byte_size(3 * N * sizeof(T)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
T *a = new T[N], *b = new T[N], *c = new T[N];
|
T *a = new T[N], *b = new T[N], *c = new T[N];
|
||||||
@ -45,7 +45,7 @@ void test(const uint64_t& N) noexcept {
|
|||||||
delete[] a, delete[] b, delete[] c;
|
delete[] a, delete[] b, delete[] c;
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_float(void) noexcept {
|
void test_float() noexcept {
|
||||||
std::cout << std::setprecision(1<<8);
|
std::cout << std::setprecision(1<<8);
|
||||||
const uint64_t N = static_cast<uint64_t>(1)<<28;
|
const uint64_t N = static_cast<uint64_t>(1)<<28;
|
||||||
test<float128_t>(N);
|
test<float128_t>(N);
|
||||||
@ -60,3 +60,4 @@ void test_float(void) noexcept {
|
|||||||
//printf("%.128lf\n", static_cast<float64_t>(1) / 3);
|
//printf("%.128lf\n", static_cast<float64_t>(1) / 3);
|
||||||
//printf("%.128f\n", static_cast<float>(1) / 3);
|
//printf("%.128f\n", static_cast<float>(1) / 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,17 +1,19 @@
|
|||||||
#include "toolbox.hpp"
|
#include "toolbox.hpp"
|
||||||
|
#include <numeric>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
inline static constexpr uint64_t u64(const double& n) noexcept { return static_cast<uint64_t>(n); }
|
static constexpr uint64_t u64(const double& n) noexcept { return static_cast<uint64_t>(n); }
|
||||||
|
|
||||||
static const constexpr size_t N_TIMES = 11;
|
static const constexpr size_t N_TIMES = 11;
|
||||||
static const constexpr std::array<const char*, N_TIMES> time_formats = { "ns", "us", "ms", "s", "m", "h", "j", "w", "M", "y", "c" };
|
static const constexpr std::array<const char*, N_TIMES> time_formats = { "ns", "µs", "ms", "s", "m", "h", "j", "w", "M", "y", "c" };
|
||||||
static const constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3), u64(1e6), u64(1e9), u64(6e10), u64(36e11), u64(864e11),
|
static const constexpr std::array<uint64_t, N_TIMES> time_numbers = { 1, u64(1e3), u64(1e6), u64(1e9), u64(6e10), u64(36e11), u64(864e11),
|
||||||
u64(6048e11), u64(26784e11), u64(31536e12), u64(31536e14) };
|
u64(6048e11), u64(26784e11), u64(31536e12), u64(31536e14) };
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Format the time in seconds in human readable format.
|
* @brief Format the time in seconds in human readable format.
|
||||||
*
|
*
|
||||||
* @param time number of seconds
|
* @param time Time in seconds
|
||||||
* @return The formatted human readable string
|
* @return std::string The formatted human readable string.
|
||||||
*/
|
*/
|
||||||
std::string format_time(uint64_t time) noexcept {
|
std::string format_time(uint64_t time) noexcept {
|
||||||
if (time == 0)
|
if (time == 0)
|
||||||
@ -19,8 +21,8 @@ std::string format_time(uint64_t time) noexcept {
|
|||||||
|
|
||||||
std::string s = "";
|
std::string s = "";
|
||||||
uint64_t res;
|
uint64_t res;
|
||||||
for (int32_t i = N_TIMES - 1; i >= 3; --i) {
|
for (int i = N_TIMES - 1; i >= 3; --i) {
|
||||||
const uint64_t time_number = time_numbers[i] / u64(1e9); // Converting nanosecond timestamp to second
|
const uint64_t time_number = time_numbers[i] / 1e9; // Converting nanosecond timestamp to second
|
||||||
if (time >= time_number) {
|
if (time >= time_number) {
|
||||||
res = time / time_number;
|
res = time / time_number;
|
||||||
time %= time_number;
|
time %= time_number;
|
||||||
@ -28,7 +30,7 @@ std::string format_time(uint64_t time) noexcept {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove trailing character
|
if (s.back() == ' ')
|
||||||
s.pop_back();
|
s.pop_back();
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
@ -38,7 +40,7 @@ std::string format_time(uint64_t time) noexcept {
|
|||||||
* @brief Format the time in nanoseconds in human readable format.
|
* @brief Format the time in nanoseconds in human readable format.
|
||||||
*
|
*
|
||||||
* @param time Time in nanoseconds
|
* @param time Time in nanoseconds
|
||||||
* @return std::string The formatted human readable string
|
* @return std::string The formatted human readable string.
|
||||||
*/
|
*/
|
||||||
std::string format_time_ns(uint64_t time) noexcept {
|
std::string format_time_ns(uint64_t time) noexcept {
|
||||||
if (time == 0)
|
if (time == 0)
|
||||||
@ -46,7 +48,7 @@ std::string format_time_ns(uint64_t time) noexcept {
|
|||||||
|
|
||||||
std::string s = "";
|
std::string s = "";
|
||||||
uint64_t res;
|
uint64_t res;
|
||||||
for (int32_t i = N_TIMES - 1; i >= 0; --i) {
|
for (int i = N_TIMES - 1; i >= 0; --i) {
|
||||||
if (time >= time_numbers[i]) {
|
if (time >= time_numbers[i]) {
|
||||||
res = time / time_numbers[i];
|
res = time / time_numbers[i];
|
||||||
time %= time_numbers[i];
|
time %= time_numbers[i];
|
||||||
@ -54,7 +56,7 @@ std::string format_time_ns(uint64_t time) noexcept {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove trailing character
|
if (s.back() == ' ')
|
||||||
s.pop_back();
|
s.pop_back();
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
@ -69,7 +71,7 @@ static const constexpr uint64_t total_bytes = u64(1)<<(10 * (N_BYTES - 1));
|
|||||||
* See more : https://en.wikipedia.org/wiki/JEDEC_memory_standards
|
* See more : https://en.wikipedia.org/wiki/JEDEC_memory_standards
|
||||||
*
|
*
|
||||||
* @param bytes Number of bytes
|
* @param bytes Number of bytes
|
||||||
* @return JEDEC compliant formatted number of bytes
|
* @return std::string JEDEC compliant formatted number of bytes
|
||||||
*/
|
*/
|
||||||
std::string format_byte_size(uint64_t bytes) noexcept {
|
std::string format_byte_size(uint64_t bytes) noexcept {
|
||||||
if (bytes == 0)
|
if (bytes == 0)
|
||||||
@ -93,13 +95,6 @@ std::string format_byte_size(uint64_t bytes) noexcept {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Format a number with a separator (i.e. 1000 as 1,000)
|
|
||||||
*
|
|
||||||
* @param k number to format
|
|
||||||
* @param separator used between each thouand
|
|
||||||
* @return Formatted number
|
|
||||||
*/
|
|
||||||
std::string thousand_sep(uint64_t k, const char& separator) noexcept {
|
std::string thousand_sep(uint64_t k, const char& separator) noexcept {
|
||||||
const std::string n = std::to_string(k);
|
const std::string n = std::to_string(k);
|
||||||
const uint64_t st_size = n.length() + (n.length() - 1) / 3;
|
const uint64_t st_size = n.length() + (n.length() - 1) / 3;
|
||||||
@ -116,3 +111,4 @@ std::string thousand_sep(uint64_t k, const char& separator) noexcept {
|
|||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,13 +3,6 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Print a formatted row of titles with of gaps seperated by a separator.
|
|
||||||
*
|
|
||||||
* @param gaps List of size gaps
|
|
||||||
* @param titles List of titles
|
|
||||||
* @param separator Separator character between each gap
|
|
||||||
*/
|
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles,
|
constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles,
|
||||||
const char* const separator = "│") noexcept {
|
const char* const separator = "│") noexcept {
|
||||||
@ -18,19 +11,10 @@ constexpr void formatted_row(const std::array<int32_t, N>& gaps, const std::arra
|
|||||||
printf("%s\n", separator);
|
printf("%s\n", separator);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Print a formatted line of repeated characters.
|
|
||||||
*
|
|
||||||
* @param gaps List of size gaps
|
|
||||||
* @param right Character on the left
|
|
||||||
* @param middle Character between each separator
|
|
||||||
* @param separator Separator character between each gap
|
|
||||||
* @param left Character on the right
|
|
||||||
*/
|
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* const left, const char* const middle,
|
constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* const right, const char* const middle,
|
||||||
const char* const separator, const char* const right) noexcept {
|
const char* const separator, const char* const left) noexcept {
|
||||||
printf("%s", left);
|
printf("%s", right);
|
||||||
for(size_t i = 0; i < N; ++i){
|
for(size_t i = 0; i < N; ++i){
|
||||||
for(int32_t j = std::abs(gaps[i]) + 2; j > 0; --j)
|
for(int32_t j = std::abs(gaps[i]) + 2; j > 0; --j)
|
||||||
printf("%s", separator);
|
printf("%s", separator);
|
||||||
@ -38,27 +22,16 @@ constexpr void formatted_line(const std::array<int32_t, N>& gaps, const char* co
|
|||||||
printf("%s", middle);
|
printf("%s", middle);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("%s\n", right);
|
printf("%s\n", left);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Print a formatted header with the given titles and sizes.
|
|
||||||
*
|
|
||||||
* @param gaps List of size gaps
|
|
||||||
* @param titles List of titles
|
|
||||||
*/
|
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
constexpr void header(const std::array<int32_t, N>& gaps, const std::array<const char* const, N>& titles) noexcept {
|
constexpr void header(const std::array<const char* const, N>& titles, const std::array<int32_t, N>& gaps) noexcept {
|
||||||
formatted_line(gaps, "┌", "┬", "─", "┐");
|
formatted_line(gaps, "┌", "┬", "─", "┐");
|
||||||
formatted_row(gaps, titles);
|
formatted_row(gaps, titles);
|
||||||
formatted_line(gaps, "├", "┼", "─", "┤");
|
formatted_line(gaps, "├", "┼", "─", "┤");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Print a formatted footer with the given sizes.
|
|
||||||
*
|
|
||||||
* @param gaps List of size gaps
|
|
||||||
*/
|
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
|
constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
|
||||||
formatted_line(gaps, "└", "┴", "─", "┘");
|
formatted_line(gaps, "└", "┴", "─", "┘");
|
||||||
@ -67,36 +40,7 @@ constexpr inline void footer(const std::array<int32_t, N>& gaps) noexcept {
|
|||||||
#define duration_ns(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count()
|
#define duration_ns(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count()
|
||||||
#define perf_counter_ns() std::chrono::high_resolution_clock::now()
|
#define perf_counter_ns() std::chrono::high_resolution_clock::now()
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Format the time in seconds in human readable format.
|
|
||||||
*
|
|
||||||
* @param time number of seconds
|
|
||||||
* @return The formatted human readable string
|
|
||||||
*/
|
|
||||||
std::string format_time(uint64_t) noexcept;
|
std::string format_time(uint64_t) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Format the time in nanoseconds in human readable format.
|
|
||||||
*
|
|
||||||
* @param time Time in nanoseconds
|
|
||||||
* @return std::string The formatted human readable string
|
|
||||||
*/
|
|
||||||
std::string format_time_ns(uint64_t) noexcept;
|
std::string format_time_ns(uint64_t) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Convert the number of byte in JEDEC standard form.
|
|
||||||
* See more : https://en.wikipedia.org/wiki/JEDEC_memory_standards
|
|
||||||
*
|
|
||||||
* @param bytes Number of bytes
|
|
||||||
* @return JEDEC compliant formatted number of bytes
|
|
||||||
*/
|
|
||||||
std::string format_byte_size(uint64_t) noexcept;
|
std::string format_byte_size(uint64_t) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Format a number with a separator (i.e. 1000 as 1,000)
|
|
||||||
*
|
|
||||||
* @param k number to format
|
|
||||||
* @param separator used between each thouand
|
|
||||||
* @return Formatted number
|
|
||||||
*/
|
|
||||||
std::string thousand_sep(uint64_t, const char& = ',') noexcept;
|
std::string thousand_sep(uint64_t, const char& = ',') noexcept;
|
||||||
|
@ -2,25 +2,14 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test if a given result is equal of the expected one and log result
|
|
||||||
*
|
|
||||||
* @tparam T type of returning values
|
|
||||||
* @param name of the unit test
|
|
||||||
* @param expected result of the function call
|
|
||||||
* @param result of the function
|
|
||||||
*/
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static void Assert(const char* const name, const T& expected, const T& result) noexcept {
|
void Assert(const char* name, const T& expected, const T& result) noexcept {
|
||||||
if(expected != result){
|
if(expected != result){
|
||||||
std::cerr << "For test named " << name << " Expected '" << expected << "' but got '" << result << "' instead\n";
|
std::cerr << "For test named " << name << " Expected '" << expected << "' but got '" << result << "' instead\n";
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test suite for the format_byte_size output
|
|
||||||
*/
|
|
||||||
void format_byte_size_test(void) noexcept {
|
void format_byte_size_test(void) noexcept {
|
||||||
Assert("format_byte_size null", std::string("0B"), format_byte_size(static_cast<uint64_t>(0)));
|
Assert("format_byte_size null", std::string("0B"), format_byte_size(static_cast<uint64_t>(0)));
|
||||||
Assert("format_byte_size byte", std::string("1B"), format_byte_size(static_cast<uint64_t>(1)));
|
Assert("format_byte_size byte", std::string("1B"), format_byte_size(static_cast<uint64_t>(1)));
|
||||||
@ -37,9 +26,6 @@ void format_byte_size_test(void) noexcept {
|
|||||||
Assert("format_byte_size max", std::string("15EB 1023PB 1023TB 1023GB 1023MB 1023KB 1023B"), format_byte_size(static_cast<uint64_t>(-1)));
|
Assert("format_byte_size max", std::string("15EB 1023PB 1023TB 1023GB 1023MB 1023KB 1023B"), format_byte_size(static_cast<uint64_t>(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test suite for the format_time output
|
|
||||||
*/
|
|
||||||
void format_time_test(void) noexcept {
|
void format_time_test(void) noexcept {
|
||||||
// https://en.wikipedia.org/wiki/Unit_of_time
|
// https://en.wikipedia.org/wiki/Unit_of_time
|
||||||
Assert("format_time null", std::string("0s"), format_time(static_cast<uint64_t>(0)));
|
Assert("format_time null", std::string("0s"), format_time(static_cast<uint64_t>(0)));
|
||||||
@ -94,15 +80,12 @@ void format_time_test(void) noexcept {
|
|||||||
Assert("format_time max", std::string("5849424173c 55y 3w 5j 7h 15s"), format_time(static_cast<uint64_t>(-1)));
|
Assert("format_time max", std::string("5849424173c 55y 3w 5j 7h 15s"), format_time(static_cast<uint64_t>(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test suite for the format_time_ns output
|
|
||||||
*/
|
|
||||||
void format_time_ns_test(void) noexcept {
|
void format_time_ns_test(void) noexcept {
|
||||||
// https://en.wikipedia.org/wiki/Unit_of_time
|
// https://en.wikipedia.org/wiki/Unit_of_time
|
||||||
Assert("format_time_ns null", std::string("0ns"), format_time_ns(static_cast<uint64_t>(0)));
|
Assert("format_time_ns null", std::string("0ns"), format_time_ns(static_cast<uint64_t>(0)));
|
||||||
Assert("format_time_ns nanosecond", std::string("1ns"), format_time_ns(static_cast<uint64_t>(1)));
|
Assert("format_time_ns nanosecond", std::string("1ns"), format_time_ns(static_cast<uint64_t>(1)));
|
||||||
Assert("format_time_ns shake", std::string("10ns"), format_time_ns(static_cast<uint64_t>(10)));
|
Assert("format_time_ns shake", std::string("10ns"), format_time_ns(static_cast<uint64_t>(10)));
|
||||||
Assert("format_time_ns microsecond", std::string("1us"), format_time_ns(static_cast<uint64_t>(1e3)));
|
Assert("format_time_ns microsecond", std::string("1µs"), format_time_ns(static_cast<uint64_t>(1e3)));
|
||||||
Assert("format_time_ns millisecond", std::string("1ms"), format_time_ns(static_cast<uint64_t>(1e6)));
|
Assert("format_time_ns millisecond", std::string("1ms"), format_time_ns(static_cast<uint64_t>(1e6)));
|
||||||
Assert("format_time_ns centisecond", std::string("10ms"), format_time_ns(static_cast<uint64_t>(1e7)));
|
Assert("format_time_ns centisecond", std::string("10ms"), format_time_ns(static_cast<uint64_t>(1e7)));
|
||||||
Assert("format_time_ns decisecond", std::string("100ms"), format_time_ns(static_cast<uint64_t>(1e8)));
|
Assert("format_time_ns decisecond", std::string("100ms"), format_time_ns(static_cast<uint64_t>(1e8)));
|
||||||
@ -129,7 +112,7 @@ void format_time_ns_test(void) noexcept {
|
|||||||
Assert("format_time_ns year", std::string("1y"), format_time_ns(static_cast<uint64_t>(31536e12)));
|
Assert("format_time_ns year", std::string("1y"), format_time_ns(static_cast<uint64_t>(31536e12)));
|
||||||
Assert("format_time_ns tropical year", std::string("1y 5h 48m 45s 216ms"), format_time_ns(static_cast<uint64_t>(31556925216e6)));
|
Assert("format_time_ns tropical year", std::string("1y 5h 48m 45s 216ms"), format_time_ns(static_cast<uint64_t>(31556925216e6)));
|
||||||
Assert("format_time_ns gregorian year", std::string("1y 5h 49m 12s"), format_time_ns(static_cast<uint64_t>(31556952e9)));
|
Assert("format_time_ns gregorian year", std::string("1y 5h 49m 12s"), format_time_ns(static_cast<uint64_t>(31556952e9)));
|
||||||
Assert("format_time_ns sidereal year", std::string("1y 6h 9m 9s 763ms 545us 600ns"), format_time_ns(static_cast<uint64_t>(315581497635456e2)));
|
Assert("format_time_ns sidereal year", std::string("1y 6h 9m 9s 763ms 545µs 600ns"), format_time_ns(static_cast<uint64_t>(315581497635456e2)));
|
||||||
Assert("format_time_ns leap year", std::string("1y 1j"), format_time_ns(static_cast<uint64_t>(316224e11)));
|
Assert("format_time_ns leap year", std::string("1y 1j"), format_time_ns(static_cast<uint64_t>(316224e11)));
|
||||||
Assert("format_time_ns olympiad", std::string("4y"), format_time_ns(static_cast<uint64_t>(126144e12)));
|
Assert("format_time_ns olympiad", std::string("4y"), format_time_ns(static_cast<uint64_t>(126144e12)));
|
||||||
Assert("format_time_ns lusturm", std::string("5y"), format_time_ns(static_cast<uint64_t>(15768e13)));
|
Assert("format_time_ns lusturm", std::string("5y"), format_time_ns(static_cast<uint64_t>(15768e13)));
|
||||||
@ -154,12 +137,9 @@ void format_time_ns_test(void) noexcept {
|
|||||||
//Assert("format_time_ns ronnasecond", std::string(""), format_time_ns(static_cast<uint64_t>(1e36)));
|
//Assert("format_time_ns ronnasecond", std::string(""), format_time_ns(static_cast<uint64_t>(1e36)));
|
||||||
//Assert("format_time_ns quettasecond", std::string(""), format_time_ns(static_cast<uint64_t>(1e39)));
|
//Assert("format_time_ns quettasecond", std::string(""), format_time_ns(static_cast<uint64_t>(1e39)));
|
||||||
// uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1
|
// uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1
|
||||||
Assert("format_time_ns max", std::string("5c 84y 11M 2j 23h 34m 33s 709ms 551us 615ns"), format_time_ns(static_cast<uint64_t>(-1)));
|
Assert("format_time_ns max", std::string("5c 84y 11M 2j 23h 34m 33s 709ms 551µs 615ns"), format_time_ns(static_cast<uint64_t>(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test suite for the thousand_sep output
|
|
||||||
*/
|
|
||||||
void thousand_sep_test(void) noexcept {
|
void thousand_sep_test(void) noexcept {
|
||||||
// https://en.wikipedia.org/wiki/Names_of_large_numbers
|
// https://en.wikipedia.org/wiki/Names_of_large_numbers
|
||||||
Assert("thousand_sep null", std::string("0"), thousand_sep(static_cast<uint64_t>(0)));
|
Assert("thousand_sep null", std::string("0"), thousand_sep(static_cast<uint64_t>(0)));
|
||||||
@ -202,3 +182,4 @@ void thousand_sep_test(void) noexcept {
|
|||||||
// uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1
|
// uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1
|
||||||
Assert("thousand_sep max", std::string("18,446,744,073,709,551,615"), thousand_sep(static_cast<uint64_t>(-1)));
|
Assert("thousand_sep max", std::string("18,446,744,073,709,551,615"), thousand_sep(static_cast<uint64_t>(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,21 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test suite for the format_byte_size output
|
|
||||||
*/
|
|
||||||
void format_byte_size_test(void) noexcept;
|
void format_byte_size_test(void) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test suite for the format_time output
|
|
||||||
*/
|
|
||||||
void format_time_test(void) noexcept;
|
void format_time_test(void) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test suite for the format_time_ns output
|
|
||||||
*/
|
|
||||||
void format_time_ns_test(void) noexcept;
|
void format_time_ns_test(void) noexcept;
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test suite for the thousand_sep output
|
|
||||||
*/
|
|
||||||
void thousand_sep_test(void) noexcept;
|
void thousand_sep_test(void) noexcept;
|
||||||
|
@ -1,19 +0,0 @@
|
|||||||
services:
|
|
||||||
downloader:
|
|
||||||
extends:
|
|
||||||
file: ./downloader/docker-compose.yaml
|
|
||||||
service: downloader
|
|
||||||
violajones-cpp:
|
|
||||||
extends:
|
|
||||||
file: ./cpp/docker-compose.yaml
|
|
||||||
service: violajones-cpp
|
|
||||||
depends_on:
|
|
||||||
downloader:
|
|
||||||
condition: service_completed_successfully
|
|
||||||
violajones-python:
|
|
||||||
extends:
|
|
||||||
file: ./python/docker-compose.yaml
|
|
||||||
service: violajones-python
|
|
||||||
depends_on:
|
|
||||||
downloader:
|
|
||||||
condition: service_completed_successfully
|
|
42
download_data.sh
Executable file
42
download_data.sh
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Exit if any of the command doesn't exit with code 0
|
||||||
|
set -e
|
||||||
|
|
||||||
|
EXEC_DIR=$1
|
||||||
|
test -z $EXEC_DIR && EXEC_DIR=.
|
||||||
|
DATA_LOCATION=$EXEC_DIR/data
|
||||||
|
mkdir -p $DATA_LOCATION
|
||||||
|
|
||||||
|
if [ ! -f $DATA_LOCATION/X_train.bin ] || [ ! -f $DATA_LOCATION/X_test.bin ] \
|
||||||
|
|| [ ! -f $DATA_LOCATION/y_train.bin ] || [ ! -f $DATA_LOCATION/y_test.bin ]; then
|
||||||
|
#if true; then
|
||||||
|
if [ ! -f $DATA_LOCATION/faces.tar.gz ]; then
|
||||||
|
echo 'Downloading raw dataset'
|
||||||
|
curl -o $DATA_LOCATION/faces.tar.gz http://www.ai.mit.edu/courses/6.899/lectures/faces.tar.gz
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo 'Extracting raw files'
|
||||||
|
tar xzf $DATA_LOCATION/faces.tar.gz -C $DATA_LOCATION
|
||||||
|
rm $DATA_LOCATION/README
|
||||||
|
rm $DATA_LOCATION/svm.*
|
||||||
|
|
||||||
|
echo 'Extracting raw train set'
|
||||||
|
tar xzf $DATA_LOCATION/face.train.tar.gz -C $DATA_LOCATION
|
||||||
|
rm $DATA_LOCATION/face.train.tar.gz
|
||||||
|
|
||||||
|
echo 'Extracting raw test set'
|
||||||
|
tar xzf $DATA_LOCATION/face.test.tar.gz -C $DATA_LOCATION
|
||||||
|
rm $DATA_LOCATION/face.test.tar.gz
|
||||||
|
|
||||||
|
echo 'Converting raw dataset to bin file'
|
||||||
|
source $EXEC_DIR/python/activate.sh $EXEC_DIR
|
||||||
|
python $EXEC_DIR/python/convert_dataset.py $DATA_LOCATION
|
||||||
|
|
||||||
|
echo 'Removing leftovers'
|
||||||
|
rm -rf $DATA_LOCATION/train
|
||||||
|
rm -rf $DATA_LOCATION/test
|
||||||
|
|
||||||
|
echo 'Done !'
|
||||||
|
fi
|
@ -1,11 +0,0 @@
|
|||||||
FROM alpine:3.19.1
|
|
||||||
|
|
||||||
RUN apk add --no-cache curl=8.5.0-r0 python3=3.11.9-r0 && rm -rf /var/cache/apk*
|
|
||||||
|
|
||||||
WORKDIR /home/ViolaJones/downloader
|
|
||||||
COPY requirements.txt activate.sh ./
|
|
||||||
RUN ./activate.sh
|
|
||||||
|
|
||||||
COPY download_data.sh convert_dataset.py ./
|
|
||||||
|
|
||||||
CMD ["./download_data.sh"]
|
|
@ -1,27 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
# Exit if any of the command doesn't exit with code 0
|
|
||||||
set -e
|
|
||||||
|
|
||||||
test -z "$EXEC_DIR" && EXEC_DIR=.
|
|
||||||
test -z "$VENV_PATH" && VENV_PATH="$EXEC_DIR/venv"
|
|
||||||
|
|
||||||
activate(){
|
|
||||||
if [ ! -d "$VENV_PATH" ]; then
|
|
||||||
echo 'Creating python virtual environnement'
|
|
||||||
python -m venv "$VENV_PATH"
|
|
||||||
echo 'Activating virtual environnement'
|
|
||||||
activate
|
|
||||||
echo 'Updating base pip packages'
|
|
||||||
python -m pip install -U setuptools pip
|
|
||||||
echo 'Installing requirements'
|
|
||||||
pip install -r requirements.txt
|
|
||||||
elif [ -f "$VENV_PATH"/Scripts/activate ]; then . "$VENV_PATH"/Scripts/activate
|
|
||||||
elif [ -f "$VENV_PATH"/bin/activate ]; then . "$VENV_PATH"/bin/activate
|
|
||||||
else
|
|
||||||
echo 'Python virtual environnement not detected'
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
activate
|
|
@ -1,60 +0,0 @@
|
|||||||
from io import BufferedReader
|
|
||||||
from tqdm import tqdm
|
|
||||||
from functools import partial
|
|
||||||
from sys import argv
|
|
||||||
import numpy as np
|
|
||||||
from os import path, listdir
|
|
||||||
|
|
||||||
# Induce determinism
|
|
||||||
np.random.seed(133742)
|
|
||||||
|
|
||||||
# Makes the "leave" argument default to False
|
|
||||||
tqdm = partial(tqdm, leave = False)
|
|
||||||
|
|
||||||
def read_pgm(pgm_file: BufferedReader) -> np.ndarray:
|
|
||||||
"""Read the data of a PGM file
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pgm_file (BufferedReader): PGM File
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.ndarray: PGM data
|
|
||||||
"""
|
|
||||||
assert (f := pgm_file.readline()) == b'P5\n', f"Incorrect file format: {f}"
|
|
||||||
(width, height) = [int(i) for i in pgm_file.readline().split()]
|
|
||||||
assert width > 0 and height > 0, f"Incorrect dimensions: {width}x{height}"
|
|
||||||
assert (depth := int(pgm_file.readline())) < 256, f"Incorrect depth: {depth}"
|
|
||||||
|
|
||||||
buff = np.empty(height * width, dtype = np.uint8)
|
|
||||||
for i in range(buff.shape[0]):
|
|
||||||
buff[i] = ord(pgm_file.read(1))
|
|
||||||
return buff.reshape((height, width))
|
|
||||||
|
|
||||||
def __main__(data_path: str) -> None:
|
|
||||||
"""Read the data of every PGM file and output it in data files
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data_path (str): Path of the PGM files
|
|
||||||
"""
|
|
||||||
for set_name in tqdm(["train", "test"], desc = "set name"):
|
|
||||||
X, y = [], []
|
|
||||||
for y_i, label in enumerate(tqdm(["non-face", "face"], desc = "label")):
|
|
||||||
for filename in tqdm(listdir(f"{data_path}/{set_name}/{label}"), desc = "Reading pgm file"):
|
|
||||||
with open(f"{data_path}/{set_name}/{label}/{filename}", "rb") as face:
|
|
||||||
X.append(read_pgm(face))
|
|
||||||
y.append(y_i)
|
|
||||||
|
|
||||||
X, y = np.asarray(X), np.asarray(y)
|
|
||||||
idx = np.random.permutation(y.shape[0])
|
|
||||||
X, y = X[idx], y[idx]
|
|
||||||
|
|
||||||
for org, s in tqdm(zip("Xy", [X, y]), desc = f"Writing {set_name}"):
|
|
||||||
with open(f"{data_path}/{org}_{set_name}.bin", "w") as out:
|
|
||||||
out.write(f'{str(s.shape)[1:-1].replace(",", "")}\n')
|
|
||||||
raw = s.ravel()
|
|
||||||
for s_i in tqdm(raw[:-1], desc = f"Writing {org}"):
|
|
||||||
out.write(f"{s_i} ")
|
|
||||||
out.write(str(raw[-1]))
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
__main__(argv[1]) if len(argv) == 2 else print(f"Usage: python {__file__[__file__.rfind(path.sep) + 1:]} ./data_location")
|
|
@ -1,6 +0,0 @@
|
|||||||
services:
|
|
||||||
downloader:
|
|
||||||
image: saundersp/violajones-downloader
|
|
||||||
build: .
|
|
||||||
volumes:
|
|
||||||
- ../data:/home/ViolaJones/data
|
|
@ -1,38 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
# Exit if any of the command doesn't exit with code 0
|
|
||||||
set -e
|
|
||||||
|
|
||||||
test -z "$EXEC_DIR" && EXEC_DIR=.
|
|
||||||
DATA_PATH="$EXEC_DIR/../data"
|
|
||||||
test ! -d "$DATA_PATH" && mkdir -v "$DATA_PATH"
|
|
||||||
|
|
||||||
if [ ! -f "$DATA_PATH"/X_train.bin ] || [ ! -f "$DATA_PATH"/X_test.bin ] \
|
|
||||||
|| [ ! -f "$DATA_PATH"/y_train.bin ] || [ ! -f "$DATA_PATH"/y_test.bin ]; then
|
|
||||||
if [ ! -f "$DATA_PATH"/faces.tar.gz ]; then
|
|
||||||
echo 'Downloading raw dataset'
|
|
||||||
curl -o "$DATA_PATH"/faces.tar.gz http://www.ai.mit.edu/courses/6.899/lectures/faces.tar.gz
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo 'Extracting raw files'
|
|
||||||
tar xvzf "$DATA_PATH"/faces.tar.gz -C "$DATA_PATH"
|
|
||||||
rm -v "$DATA_PATH"/README "$DATA_PATH"/svm.*
|
|
||||||
|
|
||||||
echo 'Extracting raw train set'
|
|
||||||
tar xvzf "$DATA_PATH"/face.train.tar.gz -C "$DATA_PATH"
|
|
||||||
rm -v "$DATA_PATH"/face.train.tar.gz
|
|
||||||
|
|
||||||
echo 'Extracting raw test set'
|
|
||||||
tar xvzf "$DATA_PATH"/face.test.tar.gz -C "$DATA_PATH"
|
|
||||||
rm -v "$DATA_PATH"/face.test.tar.gz
|
|
||||||
|
|
||||||
echo 'Converting raw dataset to bin file'
|
|
||||||
export EXEC_DIR
|
|
||||||
. "$EXEC_DIR"/activate.sh
|
|
||||||
python "$EXEC_DIR"/convert_dataset.py "$DATA_PATH"
|
|
||||||
|
|
||||||
echo 'Removing leftovers'
|
|
||||||
rm -rvf "$DATA_PATH"/train "$DATA_PATH"/test
|
|
||||||
|
|
||||||
echo 'Done !'
|
|
||||||
fi
|
|
@ -1,2 +0,0 @@
|
|||||||
numpy==1.26.4
|
|
||||||
tqdm==4.66.2
|
|
@ -1,12 +0,0 @@
|
|||||||
FROM nvidia/cuda:12.4.1-devel-ubi9 as builder
|
|
||||||
|
|
||||||
RUN dnf install -y python3.11-3.11.5-1.el9_3 && dnf clean all
|
|
||||||
RUN ln -s /usr/bin/python3 /usr/bin/python
|
|
||||||
|
|
||||||
WORKDIR /home/ViolaJones/python
|
|
||||||
COPY Makefile activate.sh requirements.txt ./
|
|
||||||
RUN make venv
|
|
||||||
COPY *.py ./
|
|
||||||
|
|
||||||
ENTRYPOINT ["make"]
|
|
||||||
CMD ["start"]
|
|
@ -1,85 +1,34 @@
|
|||||||
MODELS_DIR := models
|
DATA := ../data/X_train.bin ../data/X_test.bin ../data/y_train.bin ../data/y_test.bin
|
||||||
OUT_DIR := out
|
|
||||||
DATA_PATH := ../data
|
|
||||||
DATA := $(DATA_PATH)/X_train.bin $(DATA_PATH)/X_test.bin $(DATA_PATH)/y_train.bin $(DATA_PATH)/y_test.bin
|
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all start reset
|
||||||
all: venv
|
|
||||||
|
|
||||||
$(DATA):
|
all: ${DATA}
|
||||||
@echo 'Missing $(DATA) files, use downloader first' && exit 1
|
|
||||||
|
${DATA}:
|
||||||
|
@bash ../download_data.sh ..
|
||||||
|
|
||||||
.PHONY: venv
|
|
||||||
venv:
|
venv:
|
||||||
@sh -c '. ./activate.sh'
|
@bash -c 'source activate.sh'
|
||||||
|
|
||||||
.PHONY: start
|
start: ${DATA} venv
|
||||||
start: $(DATA) | venv check-python-works
|
@bash -c 'source activate.sh && python projet.py'
|
||||||
@sh -c '. ./activate.sh && python projet.py'
|
|
||||||
|
|
||||||
.PHONY: debug
|
reset:
|
||||||
debug: $(DATA) | venv check-python-works check-pudb-works
|
@echo Deleting generated states and models
|
||||||
|
@rm -rf out/* models/* | true
|
||||||
|
|
||||||
|
debug:
|
||||||
@bash -c 'source activate.sh && pudb projet.py'
|
@bash -c 'source activate.sh && pudb projet.py'
|
||||||
|
|
||||||
.PHONY: profile
|
profile:
|
||||||
profile: $(DATA) | venv check-python-works check-gprof2dot-works check-dot-works
|
@bash -c 'source activate.sh && python -m cProfile -o prof.out projet.py && gprof2dot -f pstats prof.out | dot -Tpng -o output.png'
|
||||||
@bash -c 'source activate.sh && python -m cProfile -o prof.out projet.py && gprof2dot -f pstats prof.out | dot -T png -o output.png'
|
|
||||||
|
|
||||||
.PHONY: log
|
mrproper: reset
|
||||||
log: $(DATA) reset | venv
|
@rm -r __pycache__ venv
|
||||||
@sed -i 's/GPU_BOOSTED: Final = False/GPU_BOOSTED: Final = True/;s/COMPILE_WITH_C: Final = False/COMPILE_WITH_C: Final = True/' config.py
|
|
||||||
@echo 'Logging GPU'
|
|
||||||
@make -s start > log_gpu
|
|
||||||
@sed -i 's/GPU_BOOSTED: Final = True/GPU_BOOSTED: Final = False/' config.py
|
|
||||||
@echo 'Logging CPU'
|
|
||||||
@make -s start > log_cpu
|
|
||||||
@sed -i 's/GPU_BOOSTED: Final = False/GPU_BOOSTED: Final = True/;s/COMPILE_WITH_C: Final = True/COMPILE_WITH_C: Final = False/' config.py
|
|
||||||
@echo 'Logging PGPU'
|
|
||||||
@make -s start > log_pgpu
|
|
||||||
@sed -i 's/GPU_BOOSTED: Final = True/GPU_BOOSTED: Final = False/' config.py
|
|
||||||
@echo 'Logging PY'
|
|
||||||
@make -s start > log_py
|
|
||||||
@echo 'Cleaning up'
|
|
||||||
@make -s reset
|
|
||||||
|
|
||||||
.PHONY: reset
|
test:
|
||||||
reset:
|
@bash -c 'source activate.sh && ls out | sed s/.pkl// | xargs -n1 python test_diff.py out'
|
||||||
@echo 'Deleting generated states and models'
|
@bash -c 'source activate.sh && ls models | sed s/.pkl// | xargs -n1 python test_diff.py models'
|
||||||
@rm -frv $(OUT_DIR)/* $(MODELS_DIR)/*
|
|
||||||
#@ln -sv /mnt/pierre_stuffs/ViolaJones/python/models .
|
|
||||||
#@ln -sv /mnt/pierre_stuffs/ViolaJones/python/out .
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
@rm -fv log_gpu log_cpu log_gpu log_py
|
|
||||||
|
|
||||||
.PHONY: mrproper
|
|
||||||
mrproper: clean
|
|
||||||
@rm -rfv __pycache__ venv
|
|
||||||
|
|
||||||
.PHONY: help
|
|
||||||
help:
|
help:
|
||||||
@echo "Available targets:"
|
@echo "all start reset mrproper help"
|
||||||
@echo "\tall: alias for start, (default target)"
|
|
||||||
@echo "\tvenv: Create python virtual environnement."
|
|
||||||
@echo "\tstart: Start the ViolaJones algorithm, require data beforehand downloaded by the downloader."
|
|
||||||
@echo "\tdebug: Debug the ViolaJones algorithm, require data beforehand downloaded by the downloader."
|
|
||||||
@echo "\tprofile: Profile the ViolaJones algorithm functions timestamps, require data beforehand downloaded by the downloader."
|
|
||||||
@echo "\treset: Will delete any saved models and processed data made by ViolaJones."
|
|
||||||
@echo "\tmrproper: Will remove cpp binary files. Will execute reset target beforehand."
|
|
||||||
|
|
||||||
.PHONY: check-python-works
|
|
||||||
check-python-works:
|
|
||||||
@python --version >/dev/null 2>&1 || (echo 'Please install Python.' && exit 1)
|
|
||||||
|
|
||||||
.PHONY: check-pudb-works
|
|
||||||
check-pudb-works:
|
|
||||||
@pudb --version >/dev/null 2>&1 || (echo 'Please install pudb.' && exit 1)
|
|
||||||
|
|
||||||
.PHONY: check-gprof2dot-works
|
|
||||||
check-gprof2dot-works:
|
|
||||||
@gprof2dot --help >/dev/null 2>&1 || (echo 'Please install gprof2dot.' && exit 1)
|
|
||||||
|
|
||||||
.PHONY: check-dot-works
|
|
||||||
check-dot-works:
|
|
||||||
@dot --version >/dev/null 2>&1 || (echo 'Please install dot from graphviz.' && exit 1)
|
|
||||||
|
@ -18,13 +18,13 @@ else:
|
|||||||
|
|
||||||
@njit('uint8[:, :, :, :](uint16, uint16)')
|
@njit('uint8[:, :, :, :](uint16, uint16)')
|
||||||
def build_features(width: int, height: int) -> np.ndarray:
|
def build_features(width: int, height: int) -> np.ndarray:
|
||||||
"""Initialize the features based on the input shape.
|
"""Initialize the features base on the input shape.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
shape (Tuple[int, int]): Shape of the image (Width, Height)
|
shape (Tuple[int, int]): Shape of the image (Width, Height).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: The initialized features
|
np.ndarray: The initialized features.
|
||||||
"""
|
"""
|
||||||
feats = []
|
feats = []
|
||||||
empty = (0, 0, 0, 0)
|
empty = (0, 0, 0, 0)
|
||||||
@ -63,10 +63,10 @@ def init_weights(y_train: np.ndarray) -> np.ndarray:
|
|||||||
"""Initialize the weights of the weak classifiers based on the training labels.
|
"""Initialize the weights of the weak classifiers based on the training labels.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
y_train (np.ndarray): Training labels
|
y_train (np.ndarray): Training labels.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: The initialized weights
|
np.ndarray: The initialized weights.
|
||||||
"""
|
"""
|
||||||
weights = np.empty_like(y_train, dtype = np.float64)
|
weights = np.empty_like(y_train, dtype = np.float64)
|
||||||
t = y_train.sum()
|
t = y_train.sum()
|
||||||
@ -79,48 +79,26 @@ def classify_weak_clf(x_feat_i: np.ndarray, threshold: int, polarity: int) -> np
|
|||||||
"""Classify the integrated features based on polarity and threshold.
|
"""Classify the integrated features based on polarity and threshold.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x_feat_i (np.ndarray): Integrated features
|
x_feat_i (np.ndarray): Integrated features.
|
||||||
threshold (int): Trained threshold
|
threshold (int): Trained threshold.
|
||||||
polarity (int): Trained polarity
|
polarity (int): Trained polarity.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Classified features
|
np.ndarray: Classified features.
|
||||||
"""
|
"""
|
||||||
res = np.zeros_like(x_feat_i, dtype = np.int8)
|
res = np.zeros_like(x_feat_i, dtype = np.int8)
|
||||||
res[polarity * x_feat_i < polarity * threshold] = 1
|
res[polarity * x_feat_i < polarity * threshold] = 1
|
||||||
return res
|
return res
|
||||||
|
|
||||||
@njit('uint8[:](float64[:], int32[:, :], int32[:, :])')
|
|
||||||
def classify_viola_jones(alphas: np.ndarray, classifiers: np.ndarray, X_feat: np.ndarray) -> np.ndarray:
|
|
||||||
"""Classify the trained classifiers on the given features.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
alphas (np.ndarray): Trained alphas
|
|
||||||
classifiers (np.ndarray): Trained classifiers
|
|
||||||
X_feat (np.ndarray): Integrated features
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.ndarray: Classification results
|
|
||||||
"""
|
|
||||||
total = np.zeros(X_feat.shape[1], dtype = np.float64)
|
|
||||||
|
|
||||||
for i, alpha in enumerate(tqdm_iter(alphas, "Classifying ViolaJones")):
|
|
||||||
(j, threshold, polarity) = classifiers[i]
|
|
||||||
total += alpha * classify_weak_clf(X_feat[j], threshold, polarity)
|
|
||||||
|
|
||||||
y_pred = np.zeros(X_feat.shape[1], dtype = np.uint8)
|
|
||||||
y_pred[total >= 0.5 * np.sum(alphas)] = 1
|
|
||||||
return y_pred
|
|
||||||
|
|
||||||
@njit('Tuple((int32, float64, float64[:]))(int32[:, :], float64[:], int32[:, :], uint8[:])')
|
@njit('Tuple((int32, float64, float64[:]))(int32[:, :], float64[:], int32[:, :], uint8[:])')
|
||||||
def select_best(classifiers: np.ndarray, weights: np.ndarray, X_feat: np.ndarray, y: np.ndarray) -> Tuple[int, float, np.ndarray]:
|
def select_best(classifiers: np.ndarray, weights: np.ndarray, X_feat: np.ndarray, y: np.ndarray) -> Tuple[int, float, np.ndarray]:
|
||||||
"""Select the best classifier given their predictions.
|
"""Select the best classifier given theirs predictions.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
classifiers (np.ndarray): The weak classifiers
|
classifiers (np.ndarray): The weak classifiers.
|
||||||
weights (np.ndarray): Trained weights of each classifiers
|
weights (np.ndarray): Trained weights of each classifiers.
|
||||||
X_feat (np.ndarray): Integrated features
|
X_feat (np.ndarray): Integrated features.
|
||||||
y (np.ndarray): Features labels
|
y (np.ndarray): Features labels.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[int, float, np.ndarray]: Index of the best classifier, the best error and the best accuracy
|
Tuple[int, float, np.ndarray]: Index of the best classifier, the best error and the best accuracy
|
||||||
@ -138,13 +116,13 @@ def train_viola_jones(T: int, X_feat: np.ndarray, X_feat_argsort: np.ndarray, y:
|
|||||||
"""Train the weak classifiers.
|
"""Train the weak classifiers.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
T (int): Number of weak classifiers
|
T (int): Number of weak classifiers.
|
||||||
X_feat (np.ndarray): Integrated features
|
X_feat (np.ndarray): Integrated features.
|
||||||
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features
|
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
|
||||||
y (np.ndarray): Features labels
|
y (np.ndarray): Features labels.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[np.ndarray, np.ndarray]: List of trained alphas and the list of the final classifiers
|
Tuple[np.ndarray, np.ndarray]: List of trained alphas and the list of the final classifiers.
|
||||||
"""
|
"""
|
||||||
weights = init_weights(y)
|
weights = init_weights(y)
|
||||||
alphas, final_classifier = np.empty(T, dtype = np.float64), np.empty((T, 3), dtype = np.int32)
|
alphas, final_classifier = np.empty(T, dtype = np.float64), np.empty((T, 3), dtype = np.int32)
|
||||||
@ -161,22 +139,44 @@ def train_viola_jones(T: int, X_feat: np.ndarray, X_feat_argsort: np.ndarray, y:
|
|||||||
|
|
||||||
return alphas, final_classifier
|
return alphas, final_classifier
|
||||||
|
|
||||||
#@njit
|
@njit('uint8[:](float64[:], int32[:, :], int32[:, :])')
|
||||||
#def get_best_anova_features(X: np.ndarray, y: np.ndarray) -> np.ndarray:
|
def classify_viola_jones(alphas: np.ndarray, classifiers: np.ndarray, X_feat: np.ndarray) -> np.ndarray:
|
||||||
# #SelectPercentile(f_classif, percentile = 10).fit(X, y).get_support(indices = True)
|
"""Classify the trained classifiers on the given features.
|
||||||
# classes = [X.T[y == 0].astype(np.float64), X.T[y == 1].astype(np.float64)]
|
|
||||||
# n_samples_per_class = np.asarray([classes[0].shape[0], classes[1].shape[0]])
|
Args:
|
||||||
# n_samples = classes[0].shape[0] + classes[1].shape[0]
|
alphas (np.ndarray): Trained alphas.
|
||||||
# ss_all_data = (classes[0] ** 2).sum(axis = 0) + (classes[1] ** 2).sum(axis = 0)
|
classifiers (np.ndarray): Trained classifiers.
|
||||||
# sums_classes = [np.asarray(classes[0].sum(axis = 0)), np.asarray(classes[1].sum(axis = 0))]
|
X_feat (np.ndarray): Integrated features.
|
||||||
# sq_of_sums_all_data = (sums_classes[0] + sums_classes[1]) ** 2
|
|
||||||
# sq_of_sums_args = [sums_classes[0] ** 2, sums_classes[1] ** 2]
|
Returns:
|
||||||
# ss_tot = ss_all_data - sq_of_sums_all_data / n_samples
|
np.ndarray: Classification results.
|
||||||
#
|
"""
|
||||||
# sqd_sum_bw_n = sq_of_sums_args[0] / n_samples_per_class[0] + \
|
total = np.zeros(X_feat.shape[1], dtype = np.float64)
|
||||||
# sq_of_sums_args[1] / n_samples_per_class[1] - sq_of_sums_all_data / n_samples
|
|
||||||
# ss_wn = ss_tot - sqd_sum_bw_n
|
for i, alpha in enumerate(tqdm_iter(alphas, "Classifying ViolaJones")):
|
||||||
# df_wn = n_samples - 2
|
(j, threshold, polarity) = classifiers[i]
|
||||||
# msw = ss_wn / df_wn
|
total += alpha * classify_weak_clf(X_feat[j], threshold, polarity)
|
||||||
# f_values = sqd_sum_bw_n / msw
|
|
||||||
# return np.sort(np.argsort(f_values)[::-1][: int(np.ceil(X.shape[0] / 10.0))])
|
y_pred = np.zeros(X_feat.shape[1], dtype = np.uint8)
|
||||||
|
y_pred[total >= 0.5 * np.sum(alphas)] = 1
|
||||||
|
return y_pred
|
||||||
|
|
||||||
|
@njit
|
||||||
|
def get_best_anova_features(X: np.ndarray, y: np.ndarray) -> np.ndarray:
|
||||||
|
#SelectPercentile(f_classif, percentile = 10).fit(X, y).get_support(indices = True)
|
||||||
|
classes = [X.T[y == 0].astype(np.float64), X.T[y == 1].astype(np.float64)]
|
||||||
|
n_samples_per_class = np.asarray([classes[0].shape[0], classes[1].shape[0]])
|
||||||
|
n_samples = classes[0].shape[0] + classes[1].shape[0]
|
||||||
|
ss_alldata = (classes[0] ** 2).sum(axis = 0) + (classes[1] ** 2).sum(axis = 0)
|
||||||
|
sums_classes = [np.asarray(classes[0].sum(axis = 0)), np.asarray(classes[1].sum(axis = 0))]
|
||||||
|
sq_of_sums_alldata = (sums_classes[0] + sums_classes[1]) ** 2
|
||||||
|
sq_of_sums_args = [sums_classes[0] ** 2, sums_classes[1] ** 2]
|
||||||
|
ss_tot = ss_alldata - sq_of_sums_alldata / n_samples
|
||||||
|
|
||||||
|
sqd_sum_bw_n = sq_of_sums_args[0] / n_samples_per_class[0] + \
|
||||||
|
sq_of_sums_args[1] / n_samples_per_class[1] - sq_of_sums_alldata / n_samples
|
||||||
|
ss_wn = ss_tot - sqd_sum_bw_n
|
||||||
|
df_wn = n_samples - 2
|
||||||
|
msw = ss_wn / df_wn
|
||||||
|
f_values = sqd_sum_bw_n / msw
|
||||||
|
return np.sort(np.argsort(f_values)[::-1][: int(np.ceil(X.shape[0] / 10.0))])
|
||||||
|
@ -18,10 +18,10 @@ def set_integral_image(X: np.ndarray) -> np.ndarray:
|
|||||||
"""Transform the input images in integrated images (CPU version).
|
"""Transform the input images in integrated images (CPU version).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X (np.ndarray): Dataset of images
|
X (np.ndarray): Dataset of images.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Dataset of integrated images
|
np.ndarray: Dataset of integrated images.
|
||||||
"""
|
"""
|
||||||
X_ii = np.empty_like(X, dtype = np.uint32)
|
X_ii = np.empty_like(X, dtype = np.uint32)
|
||||||
for i, Xi in enumerate(tqdm_iter(X, "Applying integral image")):
|
for i, Xi in enumerate(tqdm_iter(X, "Applying integral image")):
|
||||||
@ -34,18 +34,59 @@ def set_integral_image(X: np.ndarray) -> np.ndarray:
|
|||||||
X_ii[i] = ii
|
X_ii[i] = ii
|
||||||
return X_ii
|
return X_ii
|
||||||
|
|
||||||
|
@njit('uint32(uint32[:, :], int16, int16, int16, int16)')
|
||||||
|
def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
|
||||||
|
"""Compute a feature on an integrated image at a specific coordinate (CPU version).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ii (np.ndarray): Integrated image.
|
||||||
|
x (int): X coordinate.
|
||||||
|
y (int): Y coordinate.
|
||||||
|
w (int): width of the feature.
|
||||||
|
h (int): height of the feature.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: Computed feature.
|
||||||
|
"""
|
||||||
|
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
|
||||||
|
|
||||||
|
@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
|
||||||
|
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
|
||||||
|
"""Apply the features on a integrated image dataset (CPU version).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
feats (np.ndarray): Features to apply.
|
||||||
|
X_ii (np.ndarray): Integrated image dataset.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.ndarray: Applied features.
|
||||||
|
"""
|
||||||
|
X_feat = np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32)
|
||||||
|
|
||||||
|
for i, (p, n) in enumerate(tqdm_iter(feats, "Applying features")):
|
||||||
|
for j, x_i in enumerate(X_ii):
|
||||||
|
p_x, p_y, p_w, p_h = p[0]
|
||||||
|
p1_x, p1_y, p1_w, p1_h = p[1]
|
||||||
|
n_x, n_y, n_w, n_h = n[0]
|
||||||
|
n1_x, n1_y, n1_w, n1_h = n[1]
|
||||||
|
p1 = __compute_feature__(x_i, p_x, p_y, p_w, p_h) + __compute_feature__(x_i, p1_x, p1_y, p1_w, p1_h)
|
||||||
|
n1 = __compute_feature__(x_i, n_x, n_y, n_w, n_h) + __compute_feature__(x_i, n1_x, n1_y, n1_w, n1_h)
|
||||||
|
X_feat[i, j] = int32(p1) - int32(n1)
|
||||||
|
|
||||||
|
return X_feat
|
||||||
|
|
||||||
@njit('int32[:, :](int32[:, :], uint16[:, :], uint8[:], float64[:])')
|
@njit('int32[:, :](int32[:, :], uint16[:, :], uint8[:], float64[:])')
|
||||||
def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray, weights: np.ndarray) -> np.ndarray:
|
def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray, weights: np.ndarray) -> np.ndarray:
|
||||||
"""Train the weak classifiers on a given dataset (CPU version).
|
"""Train the weak classifiers on a given dataset (CPU version).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_feat (np.ndarray): Feature images dataset
|
X_feat (np.ndarray): Feature images dataset.
|
||||||
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features
|
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
|
||||||
y (np.ndarray): Labels of the features
|
y (np.ndarray): Labels of the features.
|
||||||
weights (np.ndarray): Weights of the features
|
weights (np.ndarray): Weights of the features.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Trained weak classifiers
|
np.ndarray: Trained weak classifiers.
|
||||||
"""
|
"""
|
||||||
total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum()
|
total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum()
|
||||||
|
|
||||||
@ -71,85 +112,29 @@ def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray
|
|||||||
classifiers[i] = (best_threshold, best_polarity)
|
classifiers[i] = (best_threshold, best_polarity)
|
||||||
return classifiers
|
return classifiers
|
||||||
|
|
||||||
@njit('uint32(uint32[:, :], int16, int16, int16, int16)')
|
|
||||||
def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
|
|
||||||
"""Compute a feature on an integrated image at a specific coordinate (CPU version).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
ii (np.ndarray): Integrated image
|
|
||||||
x (int): X coordinate
|
|
||||||
y (int): Y coordinate
|
|
||||||
w (int): width of the feature
|
|
||||||
h (int): height of the feature
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int: Computed feature
|
|
||||||
"""
|
|
||||||
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
|
|
||||||
|
|
||||||
@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
|
|
||||||
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
|
|
||||||
"""Apply the features on a integrated image dataset (CPU version).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
feats (np.ndarray): Features to apply
|
|
||||||
X_ii (np.ndarray): Integrated image dataset
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.ndarray: Applied features
|
|
||||||
"""
|
|
||||||
X_feat = np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32)
|
|
||||||
|
|
||||||
for i, (p, n) in enumerate(tqdm_iter(feats, "Applying features")):
|
|
||||||
for j, x_i in enumerate(X_ii):
|
|
||||||
p_x, p_y, p_w, p_h = p[0]
|
|
||||||
p1_x, p1_y, p1_w, p1_h = p[1]
|
|
||||||
n_x, n_y, n_w, n_h = n[0]
|
|
||||||
n1_x, n1_y, n1_w, n1_h = n[1]
|
|
||||||
p1 = __compute_feature__(x_i, p_x, p_y, p_w, p_h) + __compute_feature__(x_i, p1_x, p1_y, p1_w, p1_h)
|
|
||||||
n1 = __compute_feature__(x_i, n_x, n_y, n_w, n_h) + __compute_feature__(x_i, n1_x, n1_y, n1_w, n1_h)
|
|
||||||
X_feat[i, j] = int32(p1) - int32(n1)
|
|
||||||
|
|
||||||
return X_feat
|
|
||||||
|
|
||||||
@njit('int32(int32[:], uint16[:], int32, int32)')
|
@njit('int32(int32[:], uint16[:], int32, int32)')
|
||||||
def _as_partition_(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> int:
|
def as_partition(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> int:
|
||||||
"""Partition of the argsort algorithm.
|
i = l - 1
|
||||||
|
j = l
|
||||||
Args:
|
for j in range(l, h + 1):
|
||||||
d_a (np.ndarray): Array on device to sort
|
if a[indices[j]] < a[indices[h]]:
|
||||||
d_indices (np.ndarray): Array of indices on device to write to
|
|
||||||
low (int): lower bound to sort
|
|
||||||
high (int): higher bound to sort
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int: Last index sorted
|
|
||||||
"""
|
|
||||||
i, j = low - 1, low
|
|
||||||
for j in range(low, high + 1):
|
|
||||||
if d_a[d_indices[j]] < d_a[d_indices[high]]:
|
|
||||||
i += 1
|
i += 1
|
||||||
d_indices[i], d_indices[j] = d_indices[j], d_indices[i]
|
indices[i], indices[j] = indices[j], indices[i]
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
d_indices[i], d_indices[j] = d_indices[j], d_indices[i]
|
indices[i], indices[j] = indices[j], indices[i]
|
||||||
return i
|
return i
|
||||||
|
|
||||||
@njit('void(int32[:], uint16[:], int32, int32)')
|
@njit('void(int32[:], uint16[:], int32, int32)')
|
||||||
def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> None:
|
def argsort_bounded(a: np.ndarray, indices: np.ndarray, l: int, h: int):
|
||||||
"""Perform an indirect sort of a given array within a given bound.
|
total = h - l + 1;
|
||||||
|
|
||||||
Args:
|
|
||||||
d_a (np.ndarray): Array to sort
|
|
||||||
d_indices (np.ndarray): Array of indices to write to
|
|
||||||
low (int): lower bound to sort
|
|
||||||
high (int): higher bound to sort
|
|
||||||
"""
|
|
||||||
total = high - low + 1
|
|
||||||
stack = np.empty((total,), dtype = np.int32)
|
stack = np.empty((total,), dtype = np.int32)
|
||||||
stack[0] = low
|
stack[0] = l
|
||||||
stack[1] = high
|
stack[1] = h
|
||||||
top = 1
|
top = 1;
|
||||||
|
|
||||||
|
low = l
|
||||||
|
high = h
|
||||||
|
|
||||||
while top >= 0:
|
while top >= 0:
|
||||||
high = stack[top]
|
high = stack[top]
|
||||||
@ -158,32 +143,24 @@ def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int)
|
|||||||
top -= 1
|
top -= 1
|
||||||
|
|
||||||
if low >= high:
|
if low >= high:
|
||||||
break
|
break;
|
||||||
|
|
||||||
p = _as_partition_(d_a, d_indices, low, high)
|
p = as_partition(a, indices, low, high);
|
||||||
|
|
||||||
if p - 1 > low:
|
if p - 1 > low:
|
||||||
top += 1
|
top += 1
|
||||||
stack[top] = low
|
stack[top] = low;
|
||||||
top += 1
|
top += 1
|
||||||
stack[top] = p - 1
|
stack[top] = p - 1;
|
||||||
|
|
||||||
if p + 1 < high:
|
if p + 1 < high:
|
||||||
top += 1
|
top += 1
|
||||||
stack[top] = p + 1
|
stack[top] = p + 1;
|
||||||
top += 1
|
top += 1
|
||||||
stack[top] = high
|
stack[top] = high;
|
||||||
|
|
||||||
@njit('uint16[:, :](int32[:, :])')
|
@njit('uint16[:, :](int32[:, :])')
|
||||||
def argsort_2d(X_feat: np.ndarray) -> np.ndarray:
|
def argsort(X_feat: np.ndarray) -> np.ndarray:
|
||||||
"""Perform an indirect sort of a given array.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
X_feat (np.ndarray): Array to sort
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.ndarray: Array of indices that sort the array
|
|
||||||
"""
|
|
||||||
indices = np.empty_like(X_feat, dtype = np.uint16)
|
indices = np.empty_like(X_feat, dtype = np.uint16)
|
||||||
indices[:, :] = np.arange(indices.shape[1])
|
indices[:, :] = np.arange(indices.shape[1])
|
||||||
for i in tqdm_iter(range(X_feat.shape[0]), "argsort"):
|
for i in tqdm_iter(range(X_feat.shape[0]), "argsort"):
|
||||||
|
@ -12,10 +12,10 @@ def __scanCPU_3d__(X: np.ndarray) -> np.ndarray:
|
|||||||
"""Prefix Sum (scan) of a given dataset.
|
"""Prefix Sum (scan) of a given dataset.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X (np.ndarray): Dataset of images to apply sum
|
X (np.ndarray): Dataset of images to apply sum.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Scanned dataset of images
|
np.ndarray: Scanned dataset of images.
|
||||||
"""
|
"""
|
||||||
for x in range(X.shape[0]):
|
for x in range(X.shape[0]):
|
||||||
for y in range(X.shape[1]):
|
for y in range(X.shape[1]):
|
||||||
@ -30,10 +30,10 @@ def __kernel_scan_3d__(n: int, j: int, d_inter: np.ndarray, d_a: np.ndarray) ->
|
|||||||
"""GPU kernel used to do a parallel prefix sum (scan).
|
"""GPU kernel used to do a parallel prefix sum (scan).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
n (int): Number of width blocks
|
n (int):
|
||||||
j (int): Temporary sum index
|
j (int): [description]
|
||||||
d_inter (np.ndarray): Temporary sums on device to add
|
d_inter (np.ndarray): [description]
|
||||||
d_a (np.ndarray): Dataset of images on device to apply sum
|
d_a (np.ndarray): [description]
|
||||||
"""
|
"""
|
||||||
x_coor, y_coor = cuda.grid(2)
|
x_coor, y_coor = cuda.grid(2)
|
||||||
|
|
||||||
@ -76,10 +76,10 @@ def __add_3d__(d_X: np.ndarray, d_s: np.ndarray, n: int, m: int) -> None:
|
|||||||
"""GPU kernel for parallel sum.
|
"""GPU kernel for parallel sum.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
d_X (np.ndarray): Dataset of images on device
|
d_X (np.ndarray): Dataset of images.
|
||||||
d_s (np.ndarray): Temporary sums on device to add
|
d_s (np.ndarray): Temporary sums to add.
|
||||||
n (int): Number of width blocks
|
n (int): Number of width blocks.
|
||||||
m (int): Height of a block
|
m (int): Height of a block.
|
||||||
"""
|
"""
|
||||||
x_coor, y_coor = cuda.grid(2)
|
x_coor, y_coor = cuda.grid(2)
|
||||||
if x_coor < n and y_coor < m:
|
if x_coor < n and y_coor < m:
|
||||||
@ -91,10 +91,10 @@ def __scanGPU_3d__(X: np.ndarray) -> np.ndarray:
|
|||||||
Read more: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
|
Read more: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X (np.ndarray): Dataset of images
|
X (np.ndarray): Dataset of images.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Scanned dataset of images
|
np.ndarray: Scanned dataset of images.
|
||||||
"""
|
"""
|
||||||
k, height, n = X.shape
|
k, height, n = X.shape
|
||||||
n_block_x, n_block_y = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64)
|
n_block_x, n_block_y = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64)
|
||||||
@ -131,10 +131,10 @@ def __transpose_kernel__(d_X: np.ndarray, d_Xt: np.ndarray) -> None:
|
|||||||
"""GPU kernel of the function __transpose_3d__.
|
"""GPU kernel of the function __transpose_3d__.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
d_X (np.ndarray): Dataset of images on device
|
d_X (np.ndarray): Dataset of images.
|
||||||
d_Xt(np.ndarray): Transposed dataset of images
|
d_Xt(np.ndarray): Transposed dataset of images.
|
||||||
width (int): Width of each images in the dataset
|
width (int): Width of each images in the dataset.
|
||||||
height (int): Height of each images in the dataset
|
height (int): Height of each images in the dataset.
|
||||||
"""
|
"""
|
||||||
temp = cuda.shared.array(NB_THREADS_2D, dtype = uint32)
|
temp = cuda.shared.array(NB_THREADS_2D, dtype = uint32)
|
||||||
|
|
||||||
@ -152,10 +152,10 @@ def __transpose_3d__(X: np.ndarray) -> np.ndarray:
|
|||||||
"""Transpose every images in the given dataset.
|
"""Transpose every images in the given dataset.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X (np.ndarray): Dataset of images
|
X (np.ndarray): Dataset of images.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Transposed dataset of images
|
np.ndarray: Transposed dataset of images.
|
||||||
"""
|
"""
|
||||||
n_block_x, n_block_z = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64)
|
n_block_x, n_block_z = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64)
|
||||||
d_X = cuda.to_device(X)
|
d_X = cuda.to_device(X)
|
||||||
@ -167,10 +167,10 @@ def set_integral_image(X: np.ndarray) -> np.ndarray:
|
|||||||
"""Transform the input images in integrated images (GPU version).
|
"""Transform the input images in integrated images (GPU version).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X (np.ndarray): Dataset of images
|
X (np.ndarray): Dataset of images.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Dataset of integrated images
|
np.ndarray: Dataset of integrated images.
|
||||||
"""
|
"""
|
||||||
X = X.astype(np.uint32)
|
X = X.astype(np.uint32)
|
||||||
X = __scanGPU_3d__(X)
|
X = __scanGPU_3d__(X)
|
||||||
@ -184,13 +184,13 @@ def __train_weak_clf_kernel__(d_classifiers: np.ndarray, d_y: np.ndarray, d_X_fe
|
|||||||
"""GPU kernel of the function train_weak_clf.
|
"""GPU kernel of the function train_weak_clf.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
d_classifiers (np.ndarray): Weak classifiers on device to train
|
d_classifiers (np.ndarray): Weak classifiers to train.
|
||||||
d_y (np.ndarray): Labels of the features on device
|
d_y (np.ndarray): Labels of the features.
|
||||||
d_X_feat (np.ndarray): Feature images dataset on device
|
d_X_feat (np.ndarray): Feature images dataset.
|
||||||
d_X_feat_argsort (np.ndarray): Sorted indexes of the integrated features on device
|
d_X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
|
||||||
d_weights (np.ndarray): Weights of the features on device
|
d_weights (np.ndarray): Weights of the features.
|
||||||
total_pos (float): Total of positive labels in the dataset
|
total_pos (float): Total of positive labels in the dataset.
|
||||||
total_neg (float): Total of negative labels in the dataset
|
total_neg (float): Total of negative labels in the dataset.
|
||||||
"""
|
"""
|
||||||
i = cuda.blockIdx.x * cuda.blockDim.x * cuda.blockDim.y * cuda.blockDim.z
|
i = cuda.blockIdx.x * cuda.blockDim.x * cuda.blockDim.y * cuda.blockDim.z
|
||||||
i += cuda.threadIdx.x * cuda.blockDim.y * cuda.blockDim.z
|
i += cuda.threadIdx.x * cuda.blockDim.y * cuda.blockDim.z
|
||||||
@ -224,13 +224,13 @@ def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray
|
|||||||
"""Train the weak classifiers on a given dataset (GPU version).
|
"""Train the weak classifiers on a given dataset (GPU version).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_feat (np.ndarray): Feature images dataset
|
X_feat (np.ndarray): Feature images dataset.
|
||||||
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features
|
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
|
||||||
y (np.ndarray): Labels of the features
|
y (np.ndarray): Labels of the features.
|
||||||
weights (np.ndarray): Weights of the features
|
weights (np.ndarray): Weights of the features.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Trained weak classifiers
|
np.ndarray: Trained weak classifiers.
|
||||||
"""
|
"""
|
||||||
total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum()
|
total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum()
|
||||||
d_classifiers = cuda.to_device(np.empty((X_feat.shape[0], 2), dtype = np.int32))
|
d_classifiers = cuda.to_device(np.empty((X_feat.shape[0], 2), dtype = np.int32))
|
||||||
@ -247,52 +247,52 @@ def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
|
|||||||
"""Compute a feature on an integrated image at a specific coordinate (GPU version).
|
"""Compute a feature on an integrated image at a specific coordinate (GPU version).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
ii (np.ndarray): Integrated image
|
ii (np.ndarray): Integrated image.
|
||||||
x (int): X coordinate
|
x (int): X coordinate.
|
||||||
y (int): Y coordinate
|
y (int): Y coordinate.
|
||||||
w (int): width of the feature
|
w (int): width of the feature.
|
||||||
h (int): height of the feature
|
h (int): height of the feature.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
int: Computed feature
|
int: Computed feature.
|
||||||
"""
|
"""
|
||||||
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
|
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
|
||||||
|
|
||||||
@cuda.jit('void(int32[:, :], uint8[:, :, :, :], uint32[:, :, :])')
|
@cuda.jit('void(int32[:, :], uint8[:, :, :, :], uint32[:, :, :])')
|
||||||
def __apply_feature_kernel__(d_X_feat: np.ndarray, d_feats: np.ndarray, d_X_ii: np.ndarray) -> None:
|
def __apply_feature_kernel__(X_feat: np.ndarray, feats: np.ndarray, X_ii: np.ndarray) -> None:
|
||||||
"""GPU kernel of the function apply_features.
|
"""GPU kernel of the function apply_features.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
d_X_feat (np.ndarray): Feature images dataset on device
|
X_feat (np.ndarray): Feature images dataset.
|
||||||
d_feats (np.ndarray): Features on device to apply
|
feats (np.ndarray): Features to apply.
|
||||||
d_X_ii (np.ndarray): Integrated image dataset on device
|
X_ii (np.ndarray): Integrated image dataset.
|
||||||
n (int): Number of features
|
n (int): Number of features.
|
||||||
m (int): Number of images of the dataset
|
m (int): Number of images of the dataset.
|
||||||
"""
|
"""
|
||||||
x, y = cuda.grid(2)
|
x, y = cuda.grid(2)
|
||||||
if x >= d_feats.shape[0] or y >= d_X_ii.shape[0]:
|
if x >= feats.shape[0] or y >= X_ii.shape[0]:
|
||||||
return
|
return
|
||||||
|
|
||||||
p_x, p_y, p_w, p_h = d_feats[x, 0, 0]
|
p_x, p_y, p_w, p_h = feats[x, 0, 0]
|
||||||
p1_x, p1_y, p1_w, p1_h = d_feats[x, 0, 1]
|
p1_x, p1_y, p1_w, p1_h = feats[x, 0, 1]
|
||||||
n_x, n_y, n_w, n_h = d_feats[x, 1, 0]
|
n_x, n_y, n_w, n_h = feats[x, 1, 0]
|
||||||
n1_x, n1_y, n1_w, n1_h = d_feats[x, 1, 1]
|
n1_x, n1_y, n1_w, n1_h = feats[x, 1, 1]
|
||||||
sP = __compute_feature__(d_X_ii[y], p_x, p_y, p_w, p_h) + \
|
sP = __compute_feature__(X_ii[y], p_x, p_y, p_w, p_h) + \
|
||||||
__compute_feature__(d_X_ii[y], p1_x, p1_y, p1_w, p1_h)
|
__compute_feature__(X_ii[y], p1_x, p1_y, p1_w, p1_h)
|
||||||
sN = __compute_feature__(d_X_ii[y], n_x, n_y, n_w, n_h) + \
|
sN = __compute_feature__(X_ii[y], n_x, n_y, n_w, n_h) + \
|
||||||
__compute_feature__(d_X_ii[y], n1_x, n1_y, n1_w, n1_h)
|
__compute_feature__(X_ii[y], n1_x, n1_y, n1_w, n1_h)
|
||||||
d_X_feat[x, y] = sP - sN
|
X_feat[x, y] = sP - sN
|
||||||
|
|
||||||
#@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
|
#@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
|
||||||
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
|
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
|
||||||
"""Apply the features on a integrated image dataset (GPU version).
|
"""Apply the features on a integrated image dataset (GPU version).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
feats (np.ndarray): Features to apply
|
feats (np.ndarray): Features to apply.
|
||||||
X_ii (np.ndarray): Integrated image dataset
|
X_ii (np.ndarray): Integrated image dataset.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Applied features
|
np.ndarray: Applied features.
|
||||||
"""
|
"""
|
||||||
d_X_feat = cuda.to_device(np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32))
|
d_X_feat = cuda.to_device(np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32))
|
||||||
d_feats = cuda.to_device(feats)
|
d_feats = cuda.to_device(feats)
|
||||||
@ -303,44 +303,28 @@ def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
|
|||||||
return d_X_feat.copy_to_host()
|
return d_X_feat.copy_to_host()
|
||||||
|
|
||||||
@cuda.jit('int32(int32[:], uint16[:], int32, int32)', device = True)
|
@cuda.jit('int32(int32[:], uint16[:], int32, int32)', device = True)
|
||||||
def _as_partition_(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> int:
|
def as_partition(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> int:
|
||||||
"""Partition of the argsort algorithm.
|
i = l - 1
|
||||||
|
j = l
|
||||||
Args:
|
for j in range(l, h + 1):
|
||||||
d_a (np.ndarray): Array on device to sort
|
if a[indices[j]] < a[indices[h]]:
|
||||||
d_indices (np.ndarray): Array of indices on device to write to
|
|
||||||
low (int): lower bound to sort
|
|
||||||
high (int): higher bound to sort
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int: Last index sorted
|
|
||||||
"""
|
|
||||||
i = low - 1
|
|
||||||
j = low
|
|
||||||
for j in range(low, high + 1):
|
|
||||||
if d_a[d_indices[j]] < d_a[d_indices[high]]:
|
|
||||||
i += 1
|
i += 1
|
||||||
d_indices[i], d_indices[j] = d_indices[j], d_indices[i]
|
indices[i], indices[j] = indices[j], indices[i]
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
d_indices[i], d_indices[j] = d_indices[j], d_indices[i]
|
indices[i], indices[j] = indices[j], indices[i]
|
||||||
return i
|
return i
|
||||||
|
|
||||||
@cuda.jit('void(int32[:], uint16[:], int32, int32)', device = True)
|
@cuda.jit('void(int32[:], uint16[:], int32, int32)', device = True)
|
||||||
def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> None:
|
def argsort_bounded(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> None:
|
||||||
"""Perform an indirect sort of a given array within a given bound.
|
#total = h - l + 1;
|
||||||
|
|
||||||
Args:
|
|
||||||
d_a (np.ndarray): Array on device to sort
|
|
||||||
d_indices (np.ndarray): Array of indices on device to write to
|
|
||||||
low (int): lower bound to sort
|
|
||||||
high (int): higher bound to sort
|
|
||||||
"""
|
|
||||||
#total = high - low + 1;
|
|
||||||
stack = cuda.local.array(6977, int32)
|
stack = cuda.local.array(6977, int32)
|
||||||
stack[0] = low
|
stack[0] = l
|
||||||
stack[1] = high
|
stack[1] = h
|
||||||
top = 1
|
top = 1;
|
||||||
|
|
||||||
|
low = l
|
||||||
|
high = h
|
||||||
|
|
||||||
while top >= 0:
|
while top >= 0:
|
||||||
high = stack[top]
|
high = stack[top]
|
||||||
@ -349,50 +333,35 @@ def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int)
|
|||||||
top -= 1
|
top -= 1
|
||||||
|
|
||||||
if low >= high:
|
if low >= high:
|
||||||
break
|
break;
|
||||||
|
|
||||||
p = _as_partition_(d_a, d_indices, low, high)
|
p = as_partition(a, indices, low, high);
|
||||||
|
|
||||||
if p - 1 > low:
|
if p - 1 > low:
|
||||||
top += 1
|
top += 1
|
||||||
stack[top] = low
|
stack[top] = low;
|
||||||
top += 1
|
top += 1
|
||||||
stack[top] = p - 1
|
stack[top] = p - 1;
|
||||||
|
|
||||||
if p + 1 < high:
|
if p + 1 < high:
|
||||||
top += 1
|
top += 1
|
||||||
stack[top] = p + 1
|
stack[top] = p + 1;
|
||||||
top += 1
|
top += 1
|
||||||
stack[top] = high
|
stack[top] = high;
|
||||||
|
|
||||||
@cuda.jit('void(int32[:, :], uint16[:, :])')
|
@cuda.jit('void(int32[:, :], uint16[:, :])')
|
||||||
def argsort_flatter(d_a: np.ndarray, d_indices: np.ndarray) -> None:
|
def argsort_flatter(X_feat: np.ndarray, indices: np.ndarray) -> None:
|
||||||
# TODO Finish doxygen
|
|
||||||
"""Cuda kernel where argsort is applied to every column of a given 2D array.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
d_a (np.ndarray): 2D Array on device to sort
|
|
||||||
d_indices (np.ndarray): 2D Array of indices on device to write to
|
|
||||||
"""
|
|
||||||
i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
|
i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
|
||||||
if i < d_a.shape[0]:
|
if i < X_feat.shape[0]:
|
||||||
for j in range(d_indices.shape[1]):
|
for j in range(indices.shape[1]):
|
||||||
d_indices[i, j] = j
|
indices[i, j] = j
|
||||||
argsort_bounded(d_a[i], d_indices[i], 0, d_a.shape[1] - 1)
|
argsort_bounded(X_feat[i], indices[i], 0, X_feat.shape[1] - 1)
|
||||||
|
|
||||||
def argsort_2d(a: np.ndarray) -> np.ndarray:
|
def argsort(X_feat: np.ndarray) -> np.ndarray:
|
||||||
"""Perform an indirect sort on each column of a given 2D array
|
indices = np.empty_like(X_feat, dtype = np.uint16)
|
||||||
|
n_blocks = int(np.ceil(np.divide(X_feat.shape[0], NB_THREADS)))
|
||||||
Args:
|
d_X_feat = cuda.to_device(X_feat)
|
||||||
a (np.ndarray): 2D Array to sort
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.ndarray: 2D Array of indices that sort the array
|
|
||||||
"""
|
|
||||||
indices = np.empty_like(a, dtype = np.uint16)
|
|
||||||
n_blocks = int(np.ceil(np.divide(a.shape[0], NB_THREADS)))
|
|
||||||
d_a = cuda.to_device(a)
|
|
||||||
d_indices = cuda.to_device(indices)
|
d_indices = cuda.to_device(indices)
|
||||||
argsort_flatter[n_blocks, NB_THREADS](d_a, d_indices)
|
argsort_flatter[n_blocks, NB_THREADS](d_X_feat, d_indices)
|
||||||
cuda.synchronize()
|
cuda.synchronize()
|
||||||
return d_indices.copy_to_host()
|
return d_indices.copy_to_host()
|
||||||
|
@ -3,8 +3,9 @@
|
|||||||
# Exit if any of the command doesn't exit with code 0
|
# Exit if any of the command doesn't exit with code 0
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
test -z "$EXEC_DIR" && EXEC_DIR=.
|
EXEC_DIR=$1
|
||||||
test -z "$VENV_PATH" && VENV_PATH="$EXEC_DIR/venv"
|
test -z "$EXEC_DIR" && EXEC_DIR=..
|
||||||
|
VENV_PATH=$EXEC_DIR/python/venv
|
||||||
|
|
||||||
activate(){
|
activate(){
|
||||||
if [ ! -d "$VENV_PATH" ]; then
|
if [ ! -d "$VENV_PATH" ]; then
|
||||||
@ -15,9 +16,9 @@ activate(){
|
|||||||
echo 'Updating base pip packages'
|
echo 'Updating base pip packages'
|
||||||
python -m pip install -U setuptools pip
|
python -m pip install -U setuptools pip
|
||||||
echo 'Installing requirements'
|
echo 'Installing requirements'
|
||||||
pip install -r requirements.txt
|
pip install -r "$EXEC_DIR"/python/requirements.txt
|
||||||
elif [ -f "$VENV_PATH"/Scripts/activate ]; then . "$VENV_PATH"/Scripts/activate
|
elif [ -f "$VENV_PATH"/Scripts/activate ]; then source "$VENV_PATH"/Scripts/activate
|
||||||
elif [ -f "$VENV_PATH"/bin/activate ]; then . "$VENV_PATH"/bin/activate
|
elif [ -f "$VENV_PATH"/bin/activate ]; then source "$VENV_PATH"/bin/activate
|
||||||
else
|
else
|
||||||
echo 'Python virtual environnement not detected'
|
echo 'Python virtual environnement not detected'
|
||||||
exit 1
|
exit 1
|
||||||
|
@ -1,29 +1,29 @@
|
|||||||
from toolbox import pickle_multi_loader, format_time_ns, unit_test_argsort_2d, header, footer, formatted_line, formatted_row
|
from toolbox import picke_multi_loader, format_time_ns, unit_test_argsort_2d
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
from time import perf_counter_ns
|
from time import perf_counter_ns
|
||||||
from sys import stderr
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from config import OUT_DIR, DATA_DIR, __DEBUG
|
from config import OUT_DIR, DATA_DIR, __DEBUG
|
||||||
|
|
||||||
def unit_test(TS: List[int], labels: List[str] = ['CPU', 'GPU', 'PY', 'PGPU'], tol: float = 1e-8) -> None:
|
def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU", "PY", "PGPU"], tol: float = 1e-8) -> None:
|
||||||
"""Test if the each result is equals to other devices.
|
"""Test if the each result is equals to other devices.
|
||||||
|
|
||||||
Given ViolaJones is a fully deterministic algorithm. The results, regardless the device, should be the same
|
Given ViolaJones is a deterministic algorithm, the results no matter the device should be the same
|
||||||
(given the floating point fluctuations), this function check this assertion.
|
(given the floating point fluctuations), this function check this assertion.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
TS (List[int]): Number of trained weak classifiers
|
TS (List[int]): Number of trained weak classifiers.
|
||||||
labels (List[str], optional): List of the trained device names. Defaults to ['CPU', 'GPU', 'PY', 'PGPU'] (see config.py for more info)
|
labels (List[str], optional): List of the trained device names. Defaults to ["CPU", "GPU", "PY", "PGPU"] (see config.py for more info).
|
||||||
tol (float, optional): Float difference tolerance. Defaults to 1e-8
|
tol (float, optional): Float difference tolerance. Defaults to 1e-8.
|
||||||
"""
|
"""
|
||||||
if len(labels) < 2:
|
if len(labels) < 2:
|
||||||
return print('Not enough devices to test')
|
return print("Not enough devices to test")
|
||||||
|
|
||||||
unit_gaps = [37, -10, -18, 29]
|
print(f"\n| {'Unit testing':<37} | {'Test state':<10} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |")
|
||||||
header(unit_gaps, ['Unit testing', 'Test state', 'Time spent (ns)', 'Formatted time spent'])
|
print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|")
|
||||||
|
|
||||||
unit_timestamp = perf_counter_ns()
|
fnc_s = perf_counter_ns()
|
||||||
n_total, n_success = 0, 0
|
n_total = 0
|
||||||
|
n_success = 0
|
||||||
|
|
||||||
def test_fnc(title, fnc):
|
def test_fnc(title, fnc):
|
||||||
nonlocal n_total, n_success
|
nonlocal n_total, n_success
|
||||||
@ -32,104 +32,96 @@ def unit_test(TS: List[int], labels: List[str] = ['CPU', 'GPU', 'PY', 'PGPU'], t
|
|||||||
state = fnc()
|
state = fnc()
|
||||||
e = perf_counter_ns() - s
|
e = perf_counter_ns() - s
|
||||||
if state:
|
if state:
|
||||||
formatted_row(unit_gaps, [title, 'Passed', f'{e:,}', format_time_ns(e)])
|
print(f"| {title:<37} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||||
n_success += 1
|
n_success += 1
|
||||||
else:
|
else:
|
||||||
formatted_row(unit_gaps, [title, 'Failed', f'{e:,}', format_time_ns(e)])
|
print(f"| {title:<37} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||||
|
|
||||||
for set_name in ['train', 'test']:
|
for set_name in ["train", "test"]:
|
||||||
for filename in ['ii', 'feat']:
|
for filename in ["ii", "feat"]:
|
||||||
title = f'X_{set_name}_{filename}'
|
title = f"X_{set_name}_{filename}"
|
||||||
print(f'{filename}...', file = stderr, end = '\r')
|
print(f"{filename}...", end = "\r")
|
||||||
bs = pickle_multi_loader([f'{title}_{label}' for label in labels], OUT_DIR)
|
bs = picke_multi_loader([f"{title}_{label}" for label in labels], OUT_DIR)
|
||||||
|
|
||||||
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
||||||
if b1 is None:
|
if b1 is None:
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
formatted_row(unit_gaps, [f'{title:<22} - {l1:<12}', 'Skipped', 'None', 'None'])
|
print(f"| {title:<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||||
continue
|
continue
|
||||||
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
||||||
if i >= j:
|
if i >= j:
|
||||||
continue
|
continue
|
||||||
if b2 is None:
|
if b2 is None:
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
formatted_row(unit_gaps, [f'{title:<22} - {l1:<4} vs {l2:<4}', 'Skipped', 'None', 'None'])
|
print(f"| {title:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||||
continue
|
continue
|
||||||
test_fnc(f'{title:<22} - {l1:<4} vs {l2:<4}', lambda: np.abs(b1 - b2).mean() < tol)
|
test_fnc(f"{title:<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol)
|
||||||
|
|
||||||
title = f'X_{set_name}_feat_argsort'
|
title = f"X_{set_name}_feat_argsort"
|
||||||
print(f'Loading {title}...', file = stderr, end = '\r')
|
print(f"Loading {title}...", end = "\r")
|
||||||
feat = None
|
feat = None
|
||||||
#indices = pickle_multi_loader(['indices'], OUT_DIR)[0]
|
|
||||||
bs = []
|
bs = []
|
||||||
for label in labels:
|
for label in labels:
|
||||||
if feat is None:
|
if feat is None:
|
||||||
feat_tmp = pickle_multi_loader([f'X_{set_name}_feat_{label}'], OUT_DIR)[0]
|
feat_tmp = picke_multi_loader([f"X_{set_name}_feat_{label}"], OUT_DIR)[0]
|
||||||
if feat_tmp is not None:
|
if feat_tmp is not None:
|
||||||
#feat = feat_tmp[indices]
|
|
||||||
feat = feat_tmp
|
feat = feat_tmp
|
||||||
bs.append(pickle_multi_loader([f'{title}_{label}'], OUT_DIR)[0])
|
bs.append(picke_multi_loader([f"{title}_{label}"], OUT_DIR)[0])
|
||||||
|
|
||||||
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
||||||
if b1 is None:
|
if b1 is None:
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
formatted_row(unit_gaps, [f'{title:<22} - {l1:<12}', 'Skipped', 'None', 'None'])
|
print(f"| {title:<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||||
continue
|
continue
|
||||||
if feat is not None:
|
if feat is not None:
|
||||||
test_fnc(f'{title:<22} - {l1:<4} argsort', lambda: unit_test_argsort_2d(feat, b1))
|
test_fnc(f"{title:<22} - {l1:<4} argsort", lambda: unit_test_argsort_2d(feat, b1))
|
||||||
|
|
||||||
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
||||||
if i >= j:
|
if i >= j:
|
||||||
continue
|
continue
|
||||||
if b2 is None:
|
if b2 is None:
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
formatted_row(unit_gaps, [f'{title:<22} - {l1:<4} vs {l2:<4}', 'Skipped', 'None', 'None'])
|
print(f"| {title:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||||
continue
|
continue
|
||||||
test_fnc(f'{title:<22} - {l1:<4} vs {l2:<4}', lambda: np.abs(b1 - b2).mean() < tol)
|
test_fnc(f"{title:<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol)
|
||||||
|
|
||||||
for T in TS:
|
for T in TS:
|
||||||
for filename in ['alphas', 'final_classifiers']:
|
for filename in ["alphas", "final_classifiers"]:
|
||||||
print(f'{filename}_{T}...', file = stderr, end = '\r')
|
print(f"{filename}_{T}...", end = "\r")
|
||||||
bs = pickle_multi_loader([f'{filename}_{T}_{label}' for label in labels])
|
bs = picke_multi_loader([f"{filename}_{T}_{label}" for label in labels])
|
||||||
|
|
||||||
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
||||||
if b1 is None:
|
if b1 is None:
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
formatted_row(unit_gaps, [f"{filename + '_' + str(T):<22} - {l1:<12}", 'Skipped', 'None', 'None'])
|
print(f"| {filename + '_' + str(T):<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||||
continue
|
continue
|
||||||
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
||||||
if i >= j:
|
if i >= j:
|
||||||
continue
|
continue
|
||||||
if b2 is None:
|
if b2 is None:
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
formatted_row(unit_gaps, [f"{filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4}", 'Skipped', 'None', 'None'])
|
print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||||
continue
|
continue
|
||||||
test_fnc(f"{filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol)
|
test_fnc(f"{filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol)
|
||||||
|
|
||||||
time_spent = perf_counter_ns() - unit_timestamp
|
print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|")
|
||||||
|
e = perf_counter_ns() - fnc_s
|
||||||
if n_total == 0:
|
print(f"| {'Unit testing summary':<37} | {str(n_success) + '/' + str(n_total):>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||||
formatted_row(unit_gaps, ['Unit testing summary', 'No files', f'{time_spent:,}', format_time_ns(time_spent)])
|
|
||||||
else:
|
|
||||||
formatted_line(unit_gaps, '├', '┼', '─', '┤')
|
|
||||||
formatted_row(unit_gaps, ['Unit testing summary', f'{n_success}/{n_total}', f'{time_spent:,}', format_time_ns(time_spent)])
|
|
||||||
|
|
||||||
footer(unit_gaps)
|
|
||||||
|
|
||||||
def load_datasets(data_dir: str = DATA_DIR) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
def load_datasets(data_dir: str = DATA_DIR) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||||||
"""Load the datasets.
|
"""Load the datasets.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data_dir (str, optional): [description]. Defaults to DATA_DIR (see config.py)
|
data_dir (str, optional): [description]. Defaults to DATA_DIR (see config.py).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: X_train, y_train, X_test, y_test
|
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: [description]
|
||||||
"""
|
"""
|
||||||
bytes_to_int_list = lambda b: list(map(int, b.rstrip().split(' ')))
|
bytes_to_int_list = lambda b: list(map(int, b.rstrip().split(" ")))
|
||||||
|
|
||||||
def load(set_name: str) -> np.ndarray:
|
def load(set_name: str) -> np.ndarray:
|
||||||
with open(f'{data_dir}/{set_name}.bin', 'r') as f:
|
with open(f"{data_dir}/{set_name}.bin", "r") as f:
|
||||||
shape = bytes_to_int_list(f.readline())
|
shape = bytes_to_int_list(f.readline())
|
||||||
return np.asarray(bytes_to_int_list(f.readline()), dtype = np.uint8).reshape(shape)
|
return np.asarray(bytes_to_int_list(f.readline()), dtype = np.uint8).reshape(shape)
|
||||||
|
|
||||||
return load('X_train'), load('y_train'), load('X_test'), load('y_test')
|
return load("X_train"), load("y_train"), load("X_test"), load("y_test")
|
||||||
|
@ -5,9 +5,6 @@ from sys import argv
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from os import path, listdir
|
from os import path, listdir
|
||||||
|
|
||||||
# Induce determinism
|
|
||||||
np.random.seed(133742)
|
|
||||||
|
|
||||||
# Makes the "leave" argument default to False
|
# Makes the "leave" argument default to False
|
||||||
tqdm = partial(tqdm, leave = False)
|
tqdm = partial(tqdm, leave = False)
|
||||||
|
|
||||||
@ -45,8 +42,8 @@ def __main__(data_path: str) -> None:
|
|||||||
y.append(y_i)
|
y.append(y_i)
|
||||||
|
|
||||||
X, y = np.asarray(X), np.asarray(y)
|
X, y = np.asarray(X), np.asarray(y)
|
||||||
idx = np.random.permutation(y.shape[0])
|
# idx = np.random.permutation(y.shape[0])
|
||||||
X, y = X[idx], y[idx]
|
# X, y = X[idx], y[idx]
|
||||||
|
|
||||||
for org, s in tqdm(zip("Xy", [X, y]), desc = f"Writing {set_name}"):
|
for org, s in tqdm(zip("Xy", [X, y]), desc = f"Writing {set_name}"):
|
||||||
with open(f"{data_path}/{org}_{set_name}.bin", "w") as out:
|
with open(f"{data_path}/{org}_{set_name}.bin", "w") as out:
|
||||||
|
@ -2,14 +2,6 @@ from typing import Callable, Iterable, Union, Any
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
def njit(f: Union[Callable, str] = None, *args, **kwargs) -> Callable:
|
def njit(f: Union[Callable, str] = None, *args, **kwargs) -> Callable:
|
||||||
"""Wrapper for optional numba's njit decorator
|
|
||||||
|
|
||||||
Args:
|
|
||||||
f (Union[Callable, str], optional): Function to wrap with numba. Defaults to None.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Callable: Wrapped function.
|
|
||||||
"""
|
|
||||||
def decorator(func: Callable) -> Any:
|
def decorator(func: Callable) -> Any:
|
||||||
return func
|
return func
|
||||||
|
|
||||||
@ -18,13 +10,4 @@ def njit(f: Union[Callable, str] = None, *args, **kwargs) -> Callable:
|
|||||||
return decorator
|
return decorator
|
||||||
|
|
||||||
def tqdm_iter(iter: Iterable, desc: str):
|
def tqdm_iter(iter: Iterable, desc: str):
|
||||||
"""Wrapper for optional tqdm iterator progress bar.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
iter (Iterable): Object to iterate over.
|
|
||||||
desc (str): Description written to stdout.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
_type_: Wrapped iterator.
|
|
||||||
"""
|
|
||||||
return tqdm(iter, leave = False, desc = desc)
|
return tqdm(iter, leave = False, desc = desc)
|
@ -1,15 +0,0 @@
|
|||||||
services:
|
|
||||||
violajones-python:
|
|
||||||
image: saundersp/violajones-python
|
|
||||||
build: .
|
|
||||||
volumes:
|
|
||||||
- ./models:/home/ViolaJones/python/models
|
|
||||||
- ./out:/home/ViolaJones/python/out
|
|
||||||
- ../data:/home/ViolaJones/data
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: 1
|
|
||||||
capabilities: [gpu]
|
|
112
python/projet.py
112
python/projet.py
@ -2,15 +2,14 @@
|
|||||||
# Author: @saundersp
|
# Author: @saundersp
|
||||||
|
|
||||||
from ViolaJones import train_viola_jones, classify_viola_jones
|
from ViolaJones import train_viola_jones, classify_viola_jones
|
||||||
#from toolbox import state_saver, pickle_multi_loader, format_time_ns, benchmark_function, unit_test_argsort_2d
|
from toolbox import state_saver, picke_multi_loader, format_time_ns, benchmark_function, unit_test_argsort_2d
|
||||||
from toolbox import state_saver, format_time_ns, benchmark_function, unit_test_argsort_2d
|
from toolbox_unit_test import format_time_ns_test
|
||||||
from toolbox import header, footer, formatted_row, formatted_line
|
from toolbox import header, footer, formatted_row, formatted_line
|
||||||
from toolbox_unit_test import format_time_test, format_time_ns_test
|
|
||||||
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
|
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
|
||||||
#from sklearn.feature_selection import SelectPercentile, f_classif
|
from sklearn.feature_selection import SelectPercentile, f_classif
|
||||||
from common import load_datasets, unit_test
|
from common import load_datasets, unit_test
|
||||||
from ViolaJones import build_features # , get_best_anova_features
|
from ViolaJones import build_features, get_best_anova_features
|
||||||
from typing import Tuple, List
|
from typing import Tuple
|
||||||
from time import perf_counter_ns
|
from time import perf_counter_ns
|
||||||
from os import makedirs
|
from os import makedirs
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -20,51 +19,44 @@ if __DEBUG:
|
|||||||
from config import IDX_INSPECT, IDX_INSPECT_OFFSET
|
from config import IDX_INSPECT, IDX_INSPECT_OFFSET
|
||||||
|
|
||||||
if GPU_BOOSTED:
|
if GPU_BOOSTED:
|
||||||
from ViolaJonesGPU import apply_features, set_integral_image, argsort_2d
|
from ViolaJonesGPU import apply_features, set_integral_image, argsort
|
||||||
label = 'GPU' if COMPILE_WITH_C else 'PGPU'
|
label = 'GPU' if COMPILE_WITH_C else 'PGPU'
|
||||||
# The parallel prefix sum doesn't use the whole GPU so numba output some annoying warnings, this disables it
|
# The parallel prefix sum doesn't use the whole GPU so numba output some annoying warnings, this disables it
|
||||||
from numba import config
|
from numba import config
|
||||||
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
|
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
|
||||||
else:
|
else:
|
||||||
from ViolaJonesCPU import apply_features, set_integral_image, argsort_2d
|
from ViolaJonesCPU import apply_features, set_integral_image, argsort
|
||||||
label = 'CPU' if COMPILE_WITH_C else 'PY'
|
label = 'CPU' if COMPILE_WITH_C else 'PY'
|
||||||
|
|
||||||
def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||||||
"""Execute the preprocessing phase
|
"""Load the dataset, calculate features and integral images, apply features to images and calculate argsort of the featured images.
|
||||||
|
|
||||||
The preprocessing phase consist of the following steps :
|
|
||||||
- Load the dataset
|
|
||||||
- Calculate features
|
|
||||||
- Calculate integral images
|
|
||||||
- Apply features to images
|
|
||||||
- Calculate argsort of the featured images
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Tuple containing in order : training features, training features sorted indexes, training labels, testing features, testing labels
|
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test
|
||||||
"""
|
"""
|
||||||
# Creating state saver folders if they don't exist already
|
# Creating state saver folders if they don't exist already
|
||||||
if SAVE_STATE:
|
if SAVE_STATE:
|
||||||
for folder_name in ['models', 'out']:
|
for folder_name in ["models", "out"]:
|
||||||
makedirs(folder_name, exist_ok = True)
|
makedirs(folder_name, exist_ok = True)
|
||||||
|
|
||||||
preproc_timestamp = perf_counter_ns()
|
preproc_timestamp = perf_counter_ns()
|
||||||
preproc_gaps = [49, -18, 29]
|
preproc_gaps = [49, -18, 29]
|
||||||
header(preproc_gaps, ['Preprocessing', 'Time spent (ns)', 'Formatted time spent'])
|
header(['Preprocessing', 'Time spent (ns)', 'Formatted time spent'], preproc_gaps)
|
||||||
|
|
||||||
X_train, y_train, X_test, y_test = state_saver('Loading sets', preproc_gaps[0], ['X_train', 'y_train', 'X_test', 'y_test'],
|
X_train, y_train, X_test, y_test = state_saver('Loading sets', preproc_gaps[0], ['X_train', 'y_train', 'X_test', 'y_test'],
|
||||||
load_datasets, FORCE_REDO, SAVE_STATE)
|
load_datasets, FORCE_REDO, SAVE_STATE)
|
||||||
|
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
print('X_train')
|
print("X_train")
|
||||||
print(X_train.shape)
|
print(X_train.shape)
|
||||||
print(X_train[IDX_INSPECT])
|
print(X_train[IDX_INSPECT])
|
||||||
print('X_test')
|
print("X_test")
|
||||||
print(X_test.shape)
|
print(X_test.shape)
|
||||||
print(X_test[IDX_INSPECT])
|
print(X_test[IDX_INSPECT])
|
||||||
print('y_train')
|
print("y_train")
|
||||||
print(y_train.shape)
|
print(y_train.shape)
|
||||||
print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
||||||
print('y_test')
|
print("y_test")
|
||||||
print(y_test.shape)
|
print(y_test.shape)
|
||||||
print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
||||||
|
|
||||||
@ -72,7 +64,7 @@ def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.
|
|||||||
FORCE_REDO, SAVE_STATE)
|
FORCE_REDO, SAVE_STATE)
|
||||||
|
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
print('feats')
|
print("feats")
|
||||||
print(feats.shape)
|
print(feats.shape)
|
||||||
print(feats[IDX_INSPECT].ravel())
|
print(feats[IDX_INSPECT].ravel())
|
||||||
|
|
||||||
@ -82,10 +74,10 @@ def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.
|
|||||||
lambda: set_integral_image(X_test), FORCE_REDO, SAVE_STATE)
|
lambda: set_integral_image(X_test), FORCE_REDO, SAVE_STATE)
|
||||||
|
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
print('X_train_ii')
|
print("X_train_ii")
|
||||||
print(X_train_ii.shape)
|
print(X_train_ii.shape)
|
||||||
print(X_train_ii[IDX_INSPECT])
|
print(X_train_ii[IDX_INSPECT])
|
||||||
print('X_test_ii')
|
print("X_test_ii")
|
||||||
print(X_test_ii.shape)
|
print(X_test_ii.shape)
|
||||||
print(X_test_ii[IDX_INSPECT])
|
print(X_test_ii[IDX_INSPECT])
|
||||||
|
|
||||||
@ -96,46 +88,45 @@ def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.
|
|||||||
del X_train_ii, X_test_ii, feats
|
del X_train_ii, X_test_ii, feats
|
||||||
|
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
print('X_train_feat')
|
print("X_train_feat")
|
||||||
print(X_train_feat.shape)
|
print(X_train_feat.shape)
|
||||||
print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||||
print('X_test_feat')
|
print("X_test_feat")
|
||||||
print(X_test_feat.shape)
|
print(X_test_feat.shape)
|
||||||
print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||||
|
|
||||||
#indices = state_saver('Selecting best features training set', 'indices', force_redo = FORCE_REDO, save_state = SAVE_STATE,
|
#indices = state_saver("Selecting best features training set", "indices", force_redo = True, save_state = SAVE_STATE,
|
||||||
# fnc = lambda: SelectPercentile(f_classif, percentile = 10).fit(X_train_feat.T, y_train).get_support(indices = True))
|
# fnc = lambda: SelectPercentile(f_classif, percentile = 10).fit(X_train_feat.T, y_train).get_support(indices = True))
|
||||||
#indices = state_saver('Selecting best features training set', 'indices', force_redo = FORCE_REDO, save_state = SAVE_STATE,
|
#indices = state_saver("Selecting best features training set", "indices", force_redo = FORCE_REDO, save_state = SAVE_STATE,
|
||||||
# fnc = lambda: get_best_anova_features(X_train_feat, y_train))
|
# fnc = lambda: get_best_anova_features(X_train_feat, y_train))
|
||||||
#indices = benchmark_function('Selecting best features (manual)', lambda: get_best_anova_features(X_train_feat, y_train))
|
#indices = benchmark_function("Selecting best features (manual)", lambda: get_best_anova_features(X_train_feat, y_train))
|
||||||
|
|
||||||
#if __DEBUG:
|
#if __DEBUG:
|
||||||
# print('indices')
|
# print("indices")
|
||||||
# print(indices.shape)
|
# print(indices.shape)
|
||||||
# print(indices[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
# print(indices[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
||||||
# assert indices.shape[0] == indices_new.shape[0], f'Indices length not equal : {indices.shape} != {indices_new.shape}'
|
# assert indices.shape[0] == indices_new.shape[0], f"Indices length not equal : {indices.shape} != {indices_new.shape}"
|
||||||
# assert (eq := indices == indices_new).all(), f'Indices not equal : {eq.sum() / indices.shape[0]}'
|
# assert (eq := indices == indices_new).all(), f"Indices not equal : {eq.sum() / indices.shape[0]}"
|
||||||
|
|
||||||
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
|
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
|
||||||
|
|
||||||
X_train_feat_argsort = state_saver(f'Precalculating training set argsort ({label})', preproc_gaps[0], f'X_train_feat_argsort_{label}',
|
X_train_feat_argsort = state_saver(f'Precalculating training set argsort ({label})', preproc_gaps[0], f'X_train_feat_argsort_{label}',
|
||||||
lambda: argsort_2d(X_train_feat), FORCE_REDO, SAVE_STATE)
|
lambda: argsort(X_train_feat), FORCE_REDO, SAVE_STATE)
|
||||||
|
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
print('X_train_feat_argsort')
|
print("X_train_feat_argsort")
|
||||||
print(X_train_feat_argsort.shape)
|
print(X_train_feat_argsort.shape)
|
||||||
print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||||
benchmark_function('Arg unit test', preproc_gaps[0], lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort))
|
benchmark_function('Arg unit test', preproc_gaps[0], lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort))
|
||||||
|
|
||||||
X_test_feat_argsort = state_saver(f'Precalculating testing set argsort ({label})', preproc_gaps[0], f'X_test_feat_argsort_{label}',
|
X_test_feat_argsort = state_saver(f"Precalculating testing set argsort ({label})", f"X_test_feat_argsort_{label}",
|
||||||
lambda: argsort_2d(X_test_feat), FORCE_REDO, SAVE_STATE)
|
lambda: argsort(X_test_feat), FORCE_REDO, SAVE_STATE)
|
||||||
|
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
print('X_test_feat_argsort')
|
print("X_test_feat_argsort")
|
||||||
print(X_test_feat_argsort.shape)
|
print(X_test_feat_argsort.shape)
|
||||||
print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||||
benchmark_function('Arg unit test', lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort))
|
benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort))
|
||||||
|
|
||||||
time_spent = perf_counter_ns() - preproc_timestamp
|
time_spent = perf_counter_ns() - preproc_timestamp
|
||||||
formatted_line(preproc_gaps, '├', '┼', '─', '┤')
|
formatted_line(preproc_gaps, '├', '┼', '─', '┤')
|
||||||
formatted_row(preproc_gaps, ['Preprocessing summary', f'{time_spent:,}', format_time_ns(time_spent)])
|
formatted_row(preproc_gaps, ['Preprocessing summary', f'{time_spent:,}', format_time_ns(time_spent)])
|
||||||
@ -147,17 +138,16 @@ def train(X_train_feat: np.ndarray, X_train_feat_argsort: np.ndarray, y_train: n
|
|||||||
"""Train the weak classifiers.
|
"""Train the weak classifiers.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_train (np.ndarray): Training images
|
X_train (np.ndarray): Training images.
|
||||||
X_train_feat_argsort (np.ndarray): Sorted indexes of the training images features
|
X_train_feat_argsort (np.ndarray): Sorted indexes of the training images features.
|
||||||
y_train (np.ndarray): Training labels
|
y_train (np.ndarray): Training labels.
|
||||||
|
|
||||||
Returns:
|
Returns: List of trained models
|
||||||
List[np.ndarray]: List of trained models
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
training_timestamp = perf_counter_ns()
|
training_timestamp = perf_counter_ns()
|
||||||
training_gaps = [26, -18, 29]
|
training_gaps = [26, -18, 29]
|
||||||
header(training_gaps, ['Training', 'Time spent (ns)', 'Formatted time spent'])
|
header(['Training', 'Time spent (ns)', 'Formatted time spent'], training_gaps)
|
||||||
models = []
|
models = []
|
||||||
|
|
||||||
for T in TS:
|
for T in TS:
|
||||||
@ -167,9 +157,9 @@ def train(X_train_feat: np.ndarray, X_train_feat_argsort: np.ndarray, y_train: n
|
|||||||
models.append([alphas, final_classifiers])
|
models.append([alphas, final_classifiers])
|
||||||
|
|
||||||
if __DEBUG:
|
if __DEBUG:
|
||||||
print('alphas')
|
print("alphas")
|
||||||
print(alphas)
|
print(alphas)
|
||||||
print('final_classifiers')
|
print("final_classifiers")
|
||||||
print(final_classifiers)
|
print(final_classifiers)
|
||||||
|
|
||||||
time_spent = perf_counter_ns() - training_timestamp
|
time_spent = perf_counter_ns() - training_timestamp
|
||||||
@ -183,15 +173,15 @@ def testing_and_evaluating(models: List[np.ndarray], X_train_feat: np.ndarray, y
|
|||||||
"""Benchmark the trained classifiers on the training and testing sets.
|
"""Benchmark the trained classifiers on the training and testing sets.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
models (List[np.ndarray]): List of trained models
|
models (List[np.ndarray]): List of trained models.
|
||||||
X_train_feat (np.ndarray): Training features
|
X_train_feat (np.ndarray): Training features.
|
||||||
y_train (np.ndarray): Training labels
|
y_train (np.ndarray): Training labels.
|
||||||
X_test_feat (np.ndarray): Testing features
|
X_test_feat (np.ndarray): Testing features.
|
||||||
y_test (np.ndarray): Testing labels
|
y_test (np.ndarray): Testing labels.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
testing_gaps = [26, -19, 24, -19, 24]
|
testing_gaps = [26, -19, 24, -19, 24]
|
||||||
header(testing_gaps, ['Testing', 'Time spent (ns) (E)', 'Formatted time spent (E)', 'Time spent (ns) (T)', 'Formatted time spent (T)'])
|
header(['Testing', 'Time spent (ns) (E)', 'Formatted time spent (E)', 'Time spent (ns) (T)', 'Formatted time spent (T)'], testing_gaps)
|
||||||
|
|
||||||
performances = []
|
performances = []
|
||||||
total_train_timestamp = 0
|
total_train_timestamp = 0
|
||||||
@ -223,7 +213,7 @@ def testing_and_evaluating(models: List[np.ndarray], X_train_feat: np.ndarray, y
|
|||||||
footer(testing_gaps)
|
footer(testing_gaps)
|
||||||
|
|
||||||
evaluating_gaps = [19, 7, 6, 6, 6, 7, 6, 6, 6]
|
evaluating_gaps = [19, 7, 6, 6, 6, 7, 6, 6, 6]
|
||||||
header(evaluating_gaps, ['Evaluating', 'ACC (E)', 'F1 (E)', 'FN (E)', 'FP (E)', 'ACC (T)', 'F1 (T)', 'FN (T)', 'FP (T)'])
|
header(['Evaluating', 'ACC (E)', 'F1 (E)', 'FN (E)', 'FP (E)', 'ACC (T)', 'F1 (T)', 'FN (T)', 'FP (T)'], evaluating_gaps)
|
||||||
|
|
||||||
for T, (e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP) in zip(TS, performances):
|
for T, (e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP) in zip(TS, performances):
|
||||||
print(f'│ ViolaJones T = {T:<4} │ {e_acc:>7.2%} │ {e_f1:>6.2f} │ {e_FN:>6,} │ {e_FP:>6,}', end = ' │ ')
|
print(f'│ ViolaJones T = {T:<4} │ {e_acc:>7.2%} │ {e_f1:>6.2f} │ {e_FN:>6,} │ {e_FP:>6,}', end = ' │ ')
|
||||||
@ -234,7 +224,7 @@ def testing_and_evaluating(models: List[np.ndarray], X_train_feat: np.ndarray, y
|
|||||||
def main() -> None:
|
def main() -> None:
|
||||||
unit_timestamp = perf_counter_ns()
|
unit_timestamp = perf_counter_ns()
|
||||||
unit_gaps = [27, -18, 29]
|
unit_gaps = [27, -18, 29]
|
||||||
header(unit_gaps, ['Unit testing', 'Time spent (ns)', 'Formatted time spent'])
|
header(['Unit testing', 'Time spent (ns)', 'Formatted time spent'], unit_gaps)
|
||||||
benchmark_function('testing format_time', unit_gaps[0], format_time_test)
|
benchmark_function('testing format_time', unit_gaps[0], format_time_test)
|
||||||
benchmark_function('testing format_time_ns', unit_gaps[0], format_time_ns_test)
|
benchmark_function('testing format_time_ns', unit_gaps[0], format_time_ns_test)
|
||||||
time_spent = perf_counter_ns() - unit_timestamp
|
time_spent = perf_counter_ns() - unit_timestamp
|
||||||
@ -245,12 +235,12 @@ def main() -> None:
|
|||||||
X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test = preprocessing()
|
X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test = preprocessing()
|
||||||
models = train(X_train_feat, X_train_feat_argsort, y_train)
|
models = train(X_train_feat, X_train_feat_argsort, y_train)
|
||||||
|
|
||||||
# X_train_feat, X_test_feat = pickle_multi_loader([f'X_train_feat_{label}', f'X_test_feat_{label}'], OUT_DIR)
|
# X_train_feat, X_test_feat = picke_multi_loader([f"X_train_feat_{label}", f"X_test_feat_{label}"], OUT_DIR)
|
||||||
# indices = pickle_multi_loader(['indices'], OUT_DIR)[0]
|
# indices = picke_multi_loader(["indices"], OUT_DIR)[0]
|
||||||
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
|
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
|
||||||
|
|
||||||
testing_and_evaluating(models, X_train_feat, y_train, X_test_feat, y_test)
|
testing_and_evaluating(models, X_train_feat, y_train, X_test_feat, y_test)
|
||||||
unit_test(TS)
|
unit_test(TS)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
numba==0.59.1
|
numba
|
||||||
scikit-learn==1.4.1.post1
|
scikit-learn
|
||||||
tqdm==4.66.2
|
tqdm
|
||||||
|
189
python/test.py
Normal file
189
python/test.py
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
import numpy as np
|
||||||
|
from numba import cuda, config, njit
|
||||||
|
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
|
||||||
|
#import matplotlib.pyplot as plt
|
||||||
|
from tqdm import tqdm
|
||||||
|
from time import perf_counter_ns
|
||||||
|
from toolbox import format_time_ns
|
||||||
|
from pickle import load, dump
|
||||||
|
from sys import argv
|
||||||
|
|
||||||
|
def get(a):
|
||||||
|
with open(f"{a}.pkl", 'rb') as f:
|
||||||
|
return load(f)
|
||||||
|
|
||||||
|
def save(a, name) -> None:
|
||||||
|
with open(name, 'wb') as f:
|
||||||
|
dump(a, f)
|
||||||
|
|
||||||
|
def diff(folder, a, label1, label2):
|
||||||
|
af, bf = get(f"{folder}/{a}_{label1}"), get(f"{folder}/{a}_{label2}")
|
||||||
|
#print(af)
|
||||||
|
#print(bf)
|
||||||
|
print((af - bf).mean())
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(argv) == 5:
|
||||||
|
diff(argv[1], argv[4], argv[2], argv[3])
|
||||||
|
|
||||||
|
def py_mean(a, b):
|
||||||
|
s = 0.0
|
||||||
|
for a_i, b_i in zip(a, b):
|
||||||
|
s += a_i * b_i
|
||||||
|
return s / a.shape[0]
|
||||||
|
|
||||||
|
def np_mean(a, b):
|
||||||
|
return np.mean(a * b)
|
||||||
|
|
||||||
|
@njit('float64(float64[:], float64[:])', fastmath = True, nogil = True)
|
||||||
|
def nb_mean(a, b):
|
||||||
|
return np.mean(a * b)
|
||||||
|
|
||||||
|
@njit('float64(float64[:], float64[:])', fastmath = True, nogil = True)
|
||||||
|
def nb_mean_loop(a, b):
|
||||||
|
s = 0.0
|
||||||
|
for a_i, b_i in zip(a, b):
|
||||||
|
s += a_i * b_i
|
||||||
|
return s / a.shape[0]
|
||||||
|
|
||||||
|
@cuda.jit('void(float64[:], float64[:], float64[:])', fastmath = True)
|
||||||
|
def cuda_mean_kernel(r, a, b):
|
||||||
|
s = 0.0
|
||||||
|
for a_i, b_i in zip(a, b):
|
||||||
|
s += a_i * b_i
|
||||||
|
r[0] = s / a.shape[0]
|
||||||
|
|
||||||
|
def cuda_mean(a, b):
|
||||||
|
r = cuda.to_device(np.empty(1, dtype = np.float64))
|
||||||
|
d_a = cuda.to_device(a)
|
||||||
|
d_b = cuda.to_device(b)
|
||||||
|
cuda_mean_kernel[1, 1](r, d_a, d_b)
|
||||||
|
return r.copy_to_host()[0]
|
||||||
|
|
||||||
|
def test_and_compare(labels, fncs, a, b):
|
||||||
|
m = []
|
||||||
|
for fnc in tqdm(fncs, leave = False, desc = "Calculating..."):
|
||||||
|
s = perf_counter_ns()
|
||||||
|
m.append([fnc(a, b), perf_counter_ns() - s])
|
||||||
|
print("Results:")
|
||||||
|
[print(f"\t{label:<10} {m_i:<20} {format_time_ns(time_i)}") for ((m_i, time_i), label) in zip(m, labels)]
|
||||||
|
print("Comparaison:")
|
||||||
|
for i, (m_i, label_i) in enumerate(zip(m, labels)):
|
||||||
|
for j, (m_j, label_j) in enumerate(zip(m, labels)):
|
||||||
|
if i >= j:
|
||||||
|
continue
|
||||||
|
print(f"\t{label_i:<10} vs {label_j:<10} - {abs(m_i[0] - m_j[0])}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
np.set_printoptions(linewidth = 10000, threshold = 1000)
|
||||||
|
|
||||||
|
N = int(2**20)
|
||||||
|
labels = ["Python", "Numpy", "Numba", "Numba loop", "CUDA"]
|
||||||
|
fncs = [py_mean, np_mean, nb_mean, nb_mean_loop, cuda_mean]
|
||||||
|
|
||||||
|
print(f"RANDOM for N={N}")
|
||||||
|
total_size = (2 * 8 * N)
|
||||||
|
print(f"Size = {total_size} B")
|
||||||
|
print(f"Size = {total_size // 1024} kB")
|
||||||
|
print(f"Size = {total_size // 1024 // 1024} MB")
|
||||||
|
print(f"Size = {total_size // 1024 // 1024 // 1024} GB")
|
||||||
|
a, b = np.random.rand(N).astype(np.float64), np.random.rand(N).astype(np.float64)
|
||||||
|
test_and_compare(labels, fncs, a, b)
|
||||||
|
del a, b
|
||||||
|
|
||||||
|
print(f"\nDETERMINSTIC for N={N}")
|
||||||
|
total_size = (2 * 8 * N) + (8 * N)
|
||||||
|
print(f"Size = {total_size} B")
|
||||||
|
print(f"Size = {total_size // 1024} kB")
|
||||||
|
print(f"Size = {total_size // 1024 // 1024} MB")
|
||||||
|
print(f"Size = {total_size // 1024 // 1024 // 1024} GB")
|
||||||
|
mask = np.arange(N, dtype = np.uint64)
|
||||||
|
a = np.ones(N, dtype = np.float64)
|
||||||
|
a[mask < N//2] = 0.1
|
||||||
|
del mask
|
||||||
|
b = np.ones(N, dtype = np.float64)
|
||||||
|
test_and_compare(labels, fncs, a, b)
|
||||||
|
del a, b
|
||||||
|
|
||||||
|
#from ViolaJonesGPU import argsort as argsort_GPU
|
||||||
|
#from ViolaJonesCPU import argsort as argsort_CPU
|
||||||
|
#from toolbox import unit_test_argsort_2d, benchmark_function
|
||||||
|
|
||||||
|
#labels = ["Numpy", "Numba", "CUDA"]
|
||||||
|
#a = np.random.randint(2**12, size = (2**20, 2**8), dtype = np.int32)
|
||||||
|
#m = [benchmark_function(f"Argsort {label}", lambda: f(np.copy(a))) for (label, f) in zip(labels, [
|
||||||
|
# lambda a: np.argsort(a).astype(np.uint16), argsort_CPU, argsort_GPU
|
||||||
|
#])]
|
||||||
|
#for i, (m_i, label_i) in enumerate(zip(m, labels)):
|
||||||
|
# #for j, (m_j, label_j) in enumerate(zip(m, labels)):
|
||||||
|
# # if i >= j:
|
||||||
|
# # continue
|
||||||
|
# # print(f"\t{label_i:<10} vs {label_j:<10} - {(m_i == m_j).mean()}")
|
||||||
|
# benchmark_function(f"Unit test {label_i}", lambda: unit_test_argsort_2d(a, m_i))
|
||||||
|
|
||||||
|
#for i in tqdm(range(X.shape[0]), leave = False, desc = "Extract image"):
|
||||||
|
# x = X[i]
|
||||||
|
# y = Y[i]
|
||||||
|
# fig = plt.figure()
|
||||||
|
# plt.imshow(x, cmap = 'gray')
|
||||||
|
# plt.savefig(f"imgs/{y}/{i}.png")
|
||||||
|
# plt.close(fig)
|
||||||
|
|
||||||
|
#def extract_FD(Xy):
|
||||||
|
# X_c, Y_c = [], []
|
||||||
|
# for x,y in Xy:
|
||||||
|
# X_c.append(x)
|
||||||
|
# Y_c.append(y)
|
||||||
|
# X_c = np.asarray(X_c)
|
||||||
|
# Y_c = np.asarray(Y_c)
|
||||||
|
# return X_c, Y_c
|
||||||
|
|
||||||
|
#X_train, y_train = get('out/X_train'), get('out/y_train')
|
||||||
|
#X_test, y_test = get('out/X_test'), get('out/y_test')
|
||||||
|
|
||||||
|
#X_train, y_train = extract_FD(get('/home/_aspil0w/git/FaceDetection/training'))
|
||||||
|
#X_test, y_test = extract_FD(get('/home/_aspil0w/git/FaceDetection/test'))
|
||||||
|
#save(X_train, 'out/X_train'), save(y_train, 'out/y_train')
|
||||||
|
#save(X_test, 'out/X_test'), save(y_test, 'out/y_test')
|
||||||
|
|
||||||
|
#print(X_train.shape, X_train_org.shape, X_train.shape == X_train_org.shape)
|
||||||
|
#print((X_train == X_train_org).mean())
|
||||||
|
#print(y_train.shape, y_train_org.shape, y_train.shape == y_train_org.shape)
|
||||||
|
#print((y_train == y_train_org).mean())
|
||||||
|
|
||||||
|
#print(X_test.shape, X_test_org.shape, X_test.shape == X_test_org.shape)
|
||||||
|
#print((X_test == X_test_org).mean())
|
||||||
|
#print(y_test.shape, y_test_org.shape, y_test.shape == y_test_org.shape)
|
||||||
|
#print((y_test == y_test_org).mean())
|
||||||
|
|
||||||
|
#@njit('uint16[:](uint8[:, :, :], uint8[:, :, :])')
|
||||||
|
#def arg_find(X, X_org):
|
||||||
|
# arg = np.empty(X.shape[0], dtype = np.uint16)
|
||||||
|
# for i, x in enumerate(X_org):
|
||||||
|
# found = False
|
||||||
|
# for j, x_org in enumerate(X):
|
||||||
|
# if np.all(x == x_org):
|
||||||
|
# arg[i] = j
|
||||||
|
# found = True
|
||||||
|
# break
|
||||||
|
# assert found, "Image not found"
|
||||||
|
# return arg
|
||||||
|
|
||||||
|
#print("Arg find results train")
|
||||||
|
#arg_train = arg_find(X_train, X_train_org)
|
||||||
|
#print((X_train[arg_train] == X_train_org).mean())
|
||||||
|
#print((y_train[arg_train] == y_train_org).mean())
|
||||||
|
|
||||||
|
#print("Arg find results test")
|
||||||
|
#arg_test = arg_find(X_test, X_test_org)
|
||||||
|
#print((X_test[arg_test] == X_test_org).mean())
|
||||||
|
#print((y_test[arg_test] == y_test_org).mean())
|
||||||
|
|
||||||
|
#for i in tqdm(range(X_c.shape[0]), leave = False, desc = "Extract image"):
|
||||||
|
# x = X_c[i]
|
||||||
|
# y = Y_c[i]
|
||||||
|
# fig = plt.figure()
|
||||||
|
# plt.imshow(x, cmap = 'gray')
|
||||||
|
# plt.savefig(f"imgs2/{y}/{i}.png")
|
||||||
|
# plt.close(fig)
|
||||||
|
|
@ -1,141 +1,90 @@
|
|||||||
from typing import Any, Callable, List, Union, Final
|
from typing import Any, Callable, List, Union, Final
|
||||||
from time import perf_counter_ns
|
from time import perf_counter_ns
|
||||||
|
from numba import njit
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sys import stderr
|
from sys import stderr
|
||||||
import pickle
|
import pickle
|
||||||
import os
|
import os
|
||||||
from config import MODEL_DIR, OUT_DIR, __DEBUG
|
from config import MODEL_DIR, OUT_DIR
|
||||||
from decorators import njit
|
from decorators import njit
|
||||||
|
|
||||||
|
time_formats: Final = ["ns", "µs", "ms", "s", "m", "h", "j", "w", "M", "y", "c"]
|
||||||
def formatted_row(gaps: list[int], titles: list[str], separator: str = '│') -> None:
|
def formatted_row(gaps: list[int], titles: list[str], separator: str = '│') -> None:
|
||||||
"""Print a formatted row of titles with of gaps seperated by a separator.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
gaps: List of size gaps
|
|
||||||
titles: List of titles
|
|
||||||
separator: Separator character between each gap
|
|
||||||
"""
|
|
||||||
for gap, title in zip(gaps, titles):
|
for gap, title in zip(gaps, titles):
|
||||||
print(f"{separator} {title:{'>' if gap < 0 else '<'}{abs(gap)}} ", end = '')
|
print(f"{separator} {title:{'>' if gap < 0 else '<'}{abs(gap)}} ", end = '')
|
||||||
print(separator)
|
print(separator)
|
||||||
|
|
||||||
def formatted_line(gaps: list[int], left: str, middle: str, separator: str, right: str) -> None:
|
def formatted_line(gaps: list[int], right: str, middle: str, separator: str, left: str) -> None:
|
||||||
"""Print a formatted line of repeated characters.
|
print(right, end = '')
|
||||||
|
|
||||||
Args:
|
|
||||||
gaps: List of size gaps
|
|
||||||
left: Character on the left
|
|
||||||
middle: Character between each separator
|
|
||||||
separator: Separator character between each gap
|
|
||||||
right: Character on the right
|
|
||||||
"""
|
|
||||||
print(left, end = '')
|
|
||||||
last_gap = len(gaps) - 1
|
last_gap = len(gaps) - 1
|
||||||
for i, gap in enumerate(gaps):
|
for i, gap in enumerate(gaps):
|
||||||
print(f'{separator * (abs(gap) + 2)}', end = '')
|
print(f'{separator * (abs(gap) + 2)}', end = '')
|
||||||
if i != last_gap:
|
if i != last_gap:
|
||||||
print(middle, end = '')
|
print(middle, end = '')
|
||||||
print(right)
|
print(left)
|
||||||
|
|
||||||
def header(gaps: list[int], titles: list[str]) -> None:
|
def header(titles: list[str], gaps: list[int]) -> None:
|
||||||
"""Print a formatted header with the given titles and sizes.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
gaps: List of size gaps
|
|
||||||
titles: List of titles
|
|
||||||
"""
|
|
||||||
formatted_line(gaps, '┌', '┬', '─', '┐')
|
formatted_line(gaps, '┌', '┬', '─', '┐')
|
||||||
formatted_row(gaps, titles)
|
formatted_row(gaps, titles)
|
||||||
formatted_line(gaps, '├', '┼', '─', '┤')
|
formatted_line(gaps, '├', '┼', '─', '┤')
|
||||||
|
|
||||||
def footer(gaps: list[int]) -> None:
|
def footer(gaps: list[int]) -> None:
|
||||||
"""Print a formatted footer with the given sizes.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
gaps: List of size gaps
|
|
||||||
"""
|
|
||||||
formatted_line(gaps, '└', '┴', '─', '┘')
|
formatted_line(gaps, '└', '┴', '─', '┘')
|
||||||
|
|
||||||
time_formats: Final = ['ns', 'µs', 'ms', 's', 'm', 'h', 'j', 'w', 'M', 'y', 'c']
|
|
||||||
time_numbers: Final = np.array([1, 1e3, 1e6, 1e9, 6e10, 36e11, 864e11, 6048e11, 26784e11, 31536e12, 31536e14], dtype = np.uint64)
|
time_numbers: Final = np.array([1, 1e3, 1e6, 1e9, 6e10, 36e11, 864e11, 6048e11, 26784e11, 31536e12, 31536e14], dtype = np.uint64)
|
||||||
@njit('str(uint64)')
|
@njit('str(uint64)')
|
||||||
def format_time_ns(time: int) -> str:
|
def format_time_ns(time: int) -> str:
|
||||||
"""Format the time in nanoseconds in human readable format.
|
"""Format the time in nanoseconds in human readable format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
time (int): Time in nanoseconds
|
time (int): Time in nanoseconds.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The formatted human readable string
|
str: The formatted human readable string.
|
||||||
"""
|
"""
|
||||||
assert time >= 0, 'Incorrect time stamp'
|
assert time >= 0, "Incorrect time stamp"
|
||||||
if time == 0:
|
if time == 0:
|
||||||
return '0ns'
|
return "0ns"
|
||||||
|
|
||||||
s = ''
|
s = ""
|
||||||
for i in range(time_numbers.shape[0])[::-1]:
|
for i in range(time_numbers.shape[0])[::-1]:
|
||||||
if time >= time_numbers[i]:
|
if time >= time_numbers[i]:
|
||||||
res = int(time // time_numbers[i])
|
res = int(time // time_numbers[i])
|
||||||
time = time % time_numbers[i]
|
time = time % time_numbers[i]
|
||||||
s += f'{res}{time_formats[i]} '
|
s += f"{res}{time_formats[i]} "
|
||||||
|
|
||||||
assert time == 0, 'Leftover in formatting time !'
|
assert time == 0, "Leftover in formatting time !"
|
||||||
return s.rstrip()
|
return s.rstrip()
|
||||||
|
|
||||||
@njit('str(uint64)')
|
def picke_multi_loader(filenames: List[str], save_dir: str = MODEL_DIR) -> List[Any]:
|
||||||
def format_time(time: int) -> str:
|
|
||||||
"""Format the time in seconds in human readable format.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
time (int): Time in seconds
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The formatted human readable string
|
|
||||||
"""
|
|
||||||
assert time >= 0, 'Incorrect time stamp'
|
|
||||||
if time == 0:
|
|
||||||
return '0s'
|
|
||||||
|
|
||||||
s = ''
|
|
||||||
for i in range(3, time_numbers.shape[0])[::-1]:
|
|
||||||
time_number = time_numbers[i] / int(1e9)
|
|
||||||
if time >= time_number:
|
|
||||||
res = int(time // time_number)
|
|
||||||
time = time % time_number
|
|
||||||
s += f'{res}{time_formats[i]} '
|
|
||||||
|
|
||||||
assert time == 0, 'Leftover in formatting time !'
|
|
||||||
return s.rstrip()
|
|
||||||
|
|
||||||
def pickle_multi_loader(filenames: List[str], save_dir: str = MODEL_DIR) -> List[Any]:
|
|
||||||
"""Load multiple pickle data files.
|
"""Load multiple pickle data files.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
filenames (List[str]): List of all the filename to load
|
filenames (List[str]): List of all the filename to load.
|
||||||
save_dir (str, optional): Path of the files to load. Defaults to MODELS_DIR (see config.py)
|
save_dir (str, optional): Path of the files to load. Defaults to MODELS_DIR (see config.py).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Any]. List of loaded pickle data files
|
List[Any]. List of loaded pickle data files.
|
||||||
"""
|
"""
|
||||||
b = []
|
b = []
|
||||||
for f in filenames:
|
for f in filenames:
|
||||||
filepath = f'{save_dir}/{f}.pkl'
|
filepath = f"{save_dir}/{f}.pkl"
|
||||||
if os.path.exists(filepath):
|
if os.path.exists(filepath):
|
||||||
with open(filepath, 'rb') as file_bytes:
|
with open(filepath, "rb") as filebyte:
|
||||||
b.append(pickle.load(file_bytes))
|
b.append(pickle.load(filebyte))
|
||||||
else:
|
else:
|
||||||
b.append(None)
|
b.append(None)
|
||||||
return b
|
return b
|
||||||
|
|
||||||
def benchmark_function(step_name: str, column_width: int, fnc: Callable) -> Any:
|
def benchmark_function(step_name: str, column_width: int, fnc: Callable) -> Any:
|
||||||
"""Benchmark a function and display the result in stdout.
|
"""Benchmark a function and display the result of stdout.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
step_name (str): Name of the function to call
|
step_name (str): Name of the function to call.
|
||||||
fnc (Callable): Function to call
|
fnc (Callable): Function to call.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Any: Result of the function
|
Any: Result of the function.
|
||||||
"""
|
"""
|
||||||
print(f'{step_name}...', file = stderr, end = '\r')
|
print(f'{step_name}...', file = stderr, end = '\r')
|
||||||
s = perf_counter_ns()
|
s = perf_counter_ns()
|
||||||
@ -149,34 +98,34 @@ def state_saver(step_name: str, column_width: int, filename: Union[str, List[str
|
|||||||
"""Either execute a function then saves the result or load the already existing result.
|
"""Either execute a function then saves the result or load the already existing result.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
step_name (str): Name of the function to call
|
step_name (str): Name of the function to call.
|
||||||
filename (Union[str, List[str]]): Name or list of names of the filenames where the result(s) are saved
|
filename (Union[str, List[str]]): Name or list of names of the filenames where the result(s) are saved.
|
||||||
fnc ([type]): Function to call
|
fnc ([type]): Function to call.
|
||||||
force_redo (bool, optional): Recall the function even if the result(s) is already saved. Defaults to False
|
force_redo (bool, optional): Recall the function even if the result(s) is already saved. Defaults to False.
|
||||||
save_dir (str, optional): Path of the directory to save the result(s). Defaults to OUT_DIR (see config.py)
|
save_dir (str, optional): Path of the directory to save the result(s). Defaults to OUT_DIR (see config.py).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Any: The result(s) of the called function
|
Any: The result(s) of the called function
|
||||||
"""
|
"""
|
||||||
if isinstance(filename, str):
|
if isinstance(filename, str):
|
||||||
if not os.path.exists(f'{save_dir}/{filename}.pkl') or force_redo:
|
if not os.path.exists(f"{save_dir}/{filename}.pkl") or force_redo:
|
||||||
b = benchmark_function(step_name, column_width, fnc)
|
b = benchmark_function(step_name, column_width, fnc)
|
||||||
if save_state:
|
if save_state:
|
||||||
|
with open(f"{save_dir}/{filename}.pkl", 'wb') as f:
|
||||||
print(f'Saving results of {step_name}', file = stderr, end = '\r')
|
print(f'Saving results of {step_name}', file = stderr, end = '\r')
|
||||||
with open(f'{save_dir}/{filename}.pkl', 'wb') as f:
|
|
||||||
pickle.dump(b, f)
|
pickle.dump(b, f)
|
||||||
print(' ' * 100, file = stderr, end = '\r')
|
print(' ' * 100, file = stderr, end = '\r')
|
||||||
return b
|
return b
|
||||||
else:
|
else:
|
||||||
|
with open(f"{save_dir}/{filename}.pkl", "rb") as f:
|
||||||
print(f'Loading results of {step_name}', file = stderr, end = '\r')
|
print(f'Loading results of {step_name}', file = stderr, end = '\r')
|
||||||
with open(f'{save_dir}/{filename}.pkl', 'rb') as f:
|
|
||||||
res = pickle.load(f)
|
res = pickle.load(f)
|
||||||
print(f"│ {step_name:<{column_width}} │ {'None':>18} │ {'loaded saved state':<29} │")
|
print(f"│ {step_name:<{column_width}} │ {'None':>18} │ {'loaded saved state':<29} │")
|
||||||
return res
|
return res
|
||||||
elif isinstance(filename, list):
|
elif isinstance(filename, list):
|
||||||
abs = False
|
abs = False
|
||||||
for fn in filename:
|
for fn in filename:
|
||||||
if not os.path.exists(f'{save_dir}/{fn}.pkl'):
|
if not os.path.exists(f"{save_dir}/{fn}.pkl"):
|
||||||
abs = True
|
abs = True
|
||||||
break
|
break
|
||||||
if abs or force_redo:
|
if abs or force_redo:
|
||||||
@ -184,7 +133,7 @@ def state_saver(step_name: str, column_width: int, filename: Union[str, List[str
|
|||||||
if save_state:
|
if save_state:
|
||||||
print(f'Saving results of {step_name}', file = stderr, end = '\r')
|
print(f'Saving results of {step_name}', file = stderr, end = '\r')
|
||||||
for bi, fnI in zip(b, filename):
|
for bi, fnI in zip(b, filename):
|
||||||
with open(f'{save_dir}/{fnI}.pkl', 'wb') as f:
|
with open(f"{save_dir}/{fnI}.pkl", 'wb') as f:
|
||||||
pickle.dump(bi, f)
|
pickle.dump(bi, f)
|
||||||
print(' ' * 100, file = stderr, end = '\r')
|
print(' ' * 100, file = stderr, end = '\r')
|
||||||
return b
|
return b
|
||||||
@ -193,31 +142,21 @@ def state_saver(step_name: str, column_width: int, filename: Union[str, List[str
|
|||||||
b = []
|
b = []
|
||||||
print(f'Loading results of {step_name}', file = stderr, end = '\r')
|
print(f'Loading results of {step_name}', file = stderr, end = '\r')
|
||||||
for fn in filename:
|
for fn in filename:
|
||||||
with open(f'{save_dir}/{fn}.pkl', 'rb') as f:
|
with open(f"{save_dir}/{fn}.pkl", "rb") as f:
|
||||||
b.append(pickle.load(f))
|
b.append(pickle.load(f))
|
||||||
print(' ' * 100, file = stderr, end = '\r')
|
print(' ' * 100, file = stderr, end = '\r')
|
||||||
return b
|
return b
|
||||||
else:
|
else:
|
||||||
assert False, f'Incompatible filename type = {type(filename)}'
|
assert False, f"Incompatible filename type = {type(filename)}"
|
||||||
|
|
||||||
@njit('boolean(int32[:, :], uint16[:, :])')
|
@njit('boolean(int32[:, :], uint16[:, :])')
|
||||||
def unit_test_argsort_2d(arr: np.ndarray, indices: np.ndarray) -> bool:
|
def unit_test_argsort_2d(arr: np.ndarray, indices: np.ndarray) -> bool:
|
||||||
"""Test if a given 2D array of indices sort a given 2D array.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
arr (np.ndarray): 2D Array of data
|
|
||||||
indices (np.ndarray): 2D Indices that sort the array
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bool: Whether the test was successful
|
|
||||||
"""
|
|
||||||
n = indices.shape[0]
|
n = indices.shape[0]
|
||||||
total = indices.shape[0] * indices.shape[1]
|
total = indices.shape[0] * indices.shape[1]
|
||||||
for i, sub_indices in enumerate(indices):
|
for i, sub_indices in enumerate(indices):
|
||||||
for j in range(sub_indices.shape[0] - 1):
|
for j in range(sub_indices.shape[0] - 1):
|
||||||
if arr[i, sub_indices[j]] <= arr[i, sub_indices[j + 1]]:
|
if arr[i, sub_indices[j]] <= arr[i, sub_indices[j + 1]]:
|
||||||
n += 1
|
n += 1
|
||||||
if __DEBUG:
|
|
||||||
if n != total:
|
if n != total:
|
||||||
print(n, total, n / (total))
|
print(n, total, n / (total))
|
||||||
return n == total
|
return n == total
|
||||||
|
@ -1,132 +1,67 @@
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
from toolbox import format_time, format_time_ns
|
from toolbox import format_time_ns
|
||||||
|
|
||||||
def Assert(name: str, expected: Any, result: Any):
|
def Assert(name: str, expected: Any, result: Any):
|
||||||
"""Test if a given result is equal of the expected one and log result
|
|
||||||
|
|
||||||
Args:
|
|
||||||
name (str): name of the unit test
|
|
||||||
expected (Any): expected result of the function call
|
|
||||||
result (Any): result of the function
|
|
||||||
"""
|
|
||||||
if expected != result:
|
if expected != result:
|
||||||
print(f"For test name {name} Expected '{expected}' but got '{result}' instead")
|
print(f"For test name {name} Expected '{expected}' but got '{result}' instead")
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
def format_time_test() -> None:
|
|
||||||
"""Test suite for the format_time output
|
|
||||||
|
|
||||||
See https://en.wikipedia.org/wiki/Unit_of_time for details
|
|
||||||
"""
|
|
||||||
Assert("format_time null", "0s", format_time(0))
|
|
||||||
Assert("format_time second", "1s", format_time(1))
|
|
||||||
Assert("format_time decasecond", "10s", format_time(10))
|
|
||||||
Assert("format_time minute", "1m", format_time(60))
|
|
||||||
Assert("format_time milliday", "1m 26s", format_time(86)) # missing 0.4s due to precision
|
|
||||||
Assert("format_time hectosecond", "1m 40s", format_time(100))
|
|
||||||
Assert("format_time kilosecond", "16m 40s", format_time(int(1e3)))
|
|
||||||
Assert("format_time hour", "1h", format_time(3600))
|
|
||||||
Assert("format_time day", "1j", format_time(86400))
|
|
||||||
Assert("format_time week/sennight", "1w", format_time(604800))
|
|
||||||
Assert("format_time megasecond", "1w 4j 13h 46m 40s", format_time(int(1e6)))
|
|
||||||
Assert("format_time fortnight", "2w", format_time(1209600))
|
|
||||||
Assert("format_time lunar month (draconitic)", "3w 6j 5h 5m 35s", format_time(2351135)) # missing 0.8 due to precision
|
|
||||||
Assert("format_time lunar month (tropical)", "3w 6j 7h 43m 4s", format_time(2360584)) # missing 0.7 due to precision
|
|
||||||
Assert("format_time lunar month (sidereal)", "3w 6j 7h 43m 11s", format_time(2360591)) # missing 0.6 to precision
|
|
||||||
Assert("format_time lunar month (anomalistic)", "3w 6j 13h 18m 33s", format_time(2380713)) # missing 0.2 due to precision
|
|
||||||
Assert("format_time lunar month (synodic)", "4w 1j 12h 44m 2s", format_time(2551442)) # missing 0.9 due to precision
|
|
||||||
Assert("format_time month", "1M", format_time(2678400))
|
|
||||||
Assert("format_time quarantine", "1M 1w 2j", format_time(int(3456e3)))
|
|
||||||
Assert("format_time semester", "4M 2j", format_time(10886400))
|
|
||||||
Assert("format_time lunar year", "11M 1w 6j 8h 52m 48s", format_time(30617568))
|
|
||||||
Assert("format_time year", "1y", format_time(int(31536e3)))
|
|
||||||
Assert("format_time tropical year", "1y 5h 48m 45s", format_time(31556925)) # missing 0.216 due to precision
|
|
||||||
Assert("format_time gregorian year", "1y 5h 49m 12s", format_time(31556952))
|
|
||||||
Assert("format_time sidereal year", "1y 6h 9m 9s", format_time(31558149)) # missing 0.7635456 due to precision
|
|
||||||
Assert("format_time leap year", "1y 1j", format_time(31622400))
|
|
||||||
Assert("format_time olympiad", "4y", format_time(int(126144e3)))
|
|
||||||
Assert("format_time lusturm", "5y", format_time(int(15768e4)))
|
|
||||||
Assert("format_time decade", "10y", format_time(int(31536e4)))
|
|
||||||
Assert("format_time indiction", "15y", format_time(int(47304e4)))
|
|
||||||
Assert("format_time score", "20y", format_time(int(63072e4)))
|
|
||||||
Assert("format_time gigasecond", "31y 8M 1w 4j 1h 46m 40s", format_time(int(1e9)))
|
|
||||||
Assert("format_time jubilee", "50y", format_time(int(15768e5)))
|
|
||||||
Assert("format_time century", "1c", format_time(int(31536e5)))
|
|
||||||
Assert("format_time millennium", "10c", format_time(int(31536e6)))
|
|
||||||
Assert("format_time age", "257c 72y", format_time(int(812745792e3)))
|
|
||||||
Assert("format_time terasecond", "3170c 97y 10M 3w 4j 17h 46m 40s", format_time(int(1e13)))
|
|
||||||
Assert("format_time megaannum", "10000c", format_time(int(31536e9)))
|
|
||||||
Assert("format_time petasecond", "317097c 91y 11M 2w 4j 1h 46m 40s", format_time(int(1e15)))
|
|
||||||
Assert("format_time galactic year", "2300000c", format_time(int(725328e10)))
|
|
||||||
Assert("format_time eon", "10000000c", format_time(int(31536e12)))
|
|
||||||
Assert("format_time kalpa", "43200000c", format_time(int(13623552e10)))
|
|
||||||
Assert("format_time exasecond", "317097919c 83y 9M 1h 46m 40s", format_time(int(1e18)))
|
|
||||||
# Cannot use number bigger than currently supported ISO Python
|
|
||||||
#Assert("format_time zettasecond", "", format_time(1e21))
|
|
||||||
#Assert("format_time yottasecond", "", format_time(1e24))
|
|
||||||
#Assert("format_time ronnasecond", "", format_time(1e27))
|
|
||||||
#Assert("format_time quettasecond", "", format_time(1e30))
|
|
||||||
# uint64_t_MAX == 2**64 == 18446744073709551615 == -1
|
|
||||||
Assert("format_time max", "5849424173c 55y 3w 5j 7h 16s", format_time(int(2**64 - 1)))
|
|
||||||
|
|
||||||
def format_time_ns_test() -> None:
|
def format_time_ns_test() -> None:
|
||||||
"""Test suite for the format_time_ns output
|
# https://en.wikipedia.org/wiki/Unit_of_time
|
||||||
|
Assert("format_time_ns null", "0ns", format_time_ns(0));
|
||||||
See https://en.wikipedia.org/wiki/Unit_of_time for details
|
Assert("format_time_ns nanosecond", "1ns", format_time_ns(1));
|
||||||
"""
|
Assert("format_time_ns shake", "10ns", format_time_ns(10));
|
||||||
Assert("format_time_ns null", "0ns", format_time_ns(0))
|
Assert("format_time_ns microsecond", "1µs", format_time_ns(int(1e3)));
|
||||||
Assert("format_time_ns nanosecond", "1ns", format_time_ns(1))
|
Assert("format_time_ns millisecond", "1ms", format_time_ns(int(1e6)));
|
||||||
Assert("format_time_ns shake", "10ns", format_time_ns(10))
|
Assert("format_time_ns centisecond", "10ms", format_time_ns(int(1e7)));
|
||||||
Assert("format_time_ns microsecond", "1µs", format_time_ns(int(1e3)))
|
Assert("format_time_ns decisecond", "100ms", format_time_ns(int(1e8)));
|
||||||
Assert("format_time_ns millisecond", "1ms", format_time_ns(int(1e6)))
|
Assert("format_time_ns second", "1s", format_time_ns(int(1e9)));
|
||||||
Assert("format_time_ns centisecond", "10ms", format_time_ns(int(1e7)))
|
Assert("format_time_ns decasecond", "10s", format_time_ns(int(1e10)));
|
||||||
Assert("format_time_ns decisecond", "100ms", format_time_ns(int(1e8)))
|
Assert("format_time_ns minute", "1m", format_time_ns(int(6e10)));
|
||||||
Assert("format_time_ns second", "1s", format_time_ns(int(1e9)))
|
Assert("format_time_ns milliday", "1m 26s 400ms", format_time_ns(int(864e8)));
|
||||||
Assert("format_time_ns decasecond", "10s", format_time_ns(int(1e10)))
|
Assert("format_time_ns hectosecond", "1m 40s", format_time_ns(int(1e11)));
|
||||||
Assert("format_time_ns minute", "1m", format_time_ns(int(6e10)))
|
Assert("format_time_ns kilosecond", "16m 40s", format_time_ns(int(1e12)));
|
||||||
Assert("format_time_ns milliday", "1m 26s 400ms", format_time_ns(int(864e8)))
|
Assert("format_time_ns hour", "1h", format_time_ns(int(36e11)));
|
||||||
Assert("format_time_ns hectosecond", "1m 40s", format_time_ns(int(1e11)))
|
Assert("format_time_ns day", "1j", format_time_ns(int(864e11)));
|
||||||
Assert("format_time_ns kilosecond", "16m 40s", format_time_ns(int(1e12)))
|
Assert("format_time_ns week/sennight", "1w", format_time_ns(int(6048e11)));
|
||||||
Assert("format_time_ns hour", "1h", format_time_ns(int(36e11)))
|
Assert("format_time_ns megasecond", "1w 4j 13h 46m 40s", format_time_ns(int(1e15)));
|
||||||
Assert("format_time_ns day", "1j", format_time_ns(int(864e11)))
|
Assert("format_time_ns fortnight", "2w", format_time_ns(int(12096e11)));
|
||||||
Assert("format_time_ns week/sennight", "1w", format_time_ns(int(6048e11)))
|
Assert("format_time_ns lunar month (draconitic)", "3w 6j 5h 5m 35s 800ms", format_time_ns(int(23511358e8)));
|
||||||
Assert("format_time_ns megasecond", "1w 4j 13h 46m 40s", format_time_ns(int(1e15)))
|
Assert("format_time_ns lunar month (tropical)", "3w 6j 7h 43m 4s 700ms", format_time_ns(int(23605847e8)));
|
||||||
Assert("format_time_ns fortnight", "2w", format_time_ns(int(12096e11)))
|
Assert("format_time_ns lunar month (sidereal)", "3w 6j 7h 43m 11s 600ms", format_time_ns(int(23605916e8)));
|
||||||
Assert("format_time_ns lunar month (draconitic)", "3w 6j 5h 5m 35s 800ms", format_time_ns(int(23511358e8)))
|
Assert("format_time_ns lunar month (anomalistic)", "3w 6j 13h 18m 33s 200ms", format_time_ns(int(23807132e8)));
|
||||||
Assert("format_time_ns lunar month (tropical)", "3w 6j 7h 43m 4s 700ms", format_time_ns(int(23605847e8)))
|
Assert("format_time_ns lunar month (synodic)", "4w 1j 12h 44m 2s 900ms", format_time_ns(int(25514429e8)));
|
||||||
Assert("format_time_ns lunar month (sidereal)", "3w 6j 7h 43m 11s 600ms", format_time_ns(int(23605916e8)))
|
Assert("format_time_ns month", "1M", format_time_ns(int(26784e11)));
|
||||||
Assert("format_time_ns lunar month (anomalistic)", "3w 6j 13h 18m 33s 200ms", format_time_ns(int(23807132e8)))
|
Assert("format_time_ns quarantine", "1M 1w 2j", format_time_ns(int(3456e12)));
|
||||||
Assert("format_time_ns lunar month (synodic)", "4w 1j 12h 44m 2s 900ms", format_time_ns(int(25514429e8)))
|
Assert("format_time_ns semester", "4M 2j", format_time_ns(int(108864e11)));
|
||||||
Assert("format_time_ns month", "1M", format_time_ns(int(26784e11)))
|
Assert("format_time_ns lunar year", "11M 1w 6j 8h 52m 48s", format_time_ns(int(30617568e9)));
|
||||||
Assert("format_time_ns quarantine", "1M 1w 2j", format_time_ns(int(3456e12)))
|
Assert("format_time_ns year", "1y", format_time_ns(int(31536e12)));
|
||||||
Assert("format_time_ns semester", "4M 2j", format_time_ns(int(108864e11)))
|
Assert("format_time_ns tropical year", "1y 5h 48m 45s 216ms", format_time_ns(int(31556925216e6)));
|
||||||
Assert("format_time_ns lunar year", "11M 1w 6j 8h 52m 48s", format_time_ns(int(30617568e9)))
|
Assert("format_time_ns gregorian year", "1y 5h 49m 12s", format_time_ns(int(31556952e9)));
|
||||||
Assert("format_time_ns year", "1y", format_time_ns(int(31536e12)))
|
Assert("format_time_ns sidereal year", "1y 6h 9m 9s 763ms 545µs 600ns", format_time_ns(int(315581497635456e2)));
|
||||||
Assert("format_time_ns tropical year", "1y 5h 48m 45s 216ms", format_time_ns(int(31556925216e6)))
|
Assert("format_time_ns leap year", "1y 1j", format_time_ns(int(316224e11)));
|
||||||
Assert("format_time_ns gregorian year", "1y 5h 49m 12s", format_time_ns(int(31556952e9)))
|
Assert("format_time_ns olympiad", "4y", format_time_ns(int(126144e12)));
|
||||||
Assert("format_time_ns sidereal year", "1y 6h 9m 9s 763ms 545µs 600ns", format_time_ns(int(315581497635456e2)))
|
Assert("format_time_ns lusturm", "5y", format_time_ns(int(15768e13)));
|
||||||
Assert("format_time_ns leap year", "1y 1j", format_time_ns(int(316224e11)))
|
Assert("format_time_ns decade", "10y", format_time_ns(int(31536e13)));
|
||||||
Assert("format_time_ns olympiad", "4y", format_time_ns(int(126144e12)))
|
Assert("format_time_ns indiction", "15y", format_time_ns(int(47304e13)));
|
||||||
Assert("format_time_ns lusturm", "5y", format_time_ns(int(15768e13)))
|
Assert("format_time_ns score", "20y", format_time_ns(int(63072e13)));
|
||||||
Assert("format_time_ns decade", "10y", format_time_ns(int(31536e13)))
|
Assert("format_time_ns gigasecond", "31y 8M 1w 4j 1h 46m 40s", format_time_ns(int(1e18)));
|
||||||
Assert("format_time_ns indiction", "15y", format_time_ns(int(47304e13)))
|
Assert("format_time_ns jubilee", "50y", format_time_ns(int(15768e14)));
|
||||||
Assert("format_time_ns score", "20y", format_time_ns(int(63072e13)))
|
Assert("format_time_ns century", "1c", format_time_ns(int(31536e14)));
|
||||||
Assert("format_time_ns gigasecond", "31y 8M 1w 4j 1h 46m 40s", format_time_ns(int(1e18)))
|
Assert("format_time_ns millennium", "10c", format_time_ns(int(31536e15)));
|
||||||
Assert("format_time_ns jubilee", "50y", format_time_ns(int(15768e14)))
|
Assert("format_time_ns age", "257c 72y", format_time_ns(int(812745792e12)));
|
||||||
Assert("format_time_ns century", "1c", format_time_ns(int(31536e14)))
|
Assert("format_time_ns terasecond", "3170c 97y 10M 3w 4j 17h 46m 40s", format_time_ns(int(1e22)));
|
||||||
Assert("format_time_ns millennium", "10c", format_time_ns(int(31536e15)))
|
Assert("format_time_ns megaannum", "10000c", format_time_ns(int(31536e18)));
|
||||||
Assert("format_time_ns age", "257c 72y", format_time_ns(int(812745792e12)))
|
|
||||||
Assert("format_time_ns terasecond", "3170c 97y 10M 3w 4j 17h 46m 40s", format_time_ns(int(1e22)))
|
|
||||||
Assert("format_time_ns megaannum", "10000c", format_time_ns(int(31536e18)))
|
|
||||||
# Cannot use number bigger than currently supported ISO Python
|
# Cannot use number bigger than currently supported ISO Python
|
||||||
# Assert("format_time_ns petasecond", "317097c 91y 11M 2w 4j 1h 46m 40s", format_time_ns(int(1e24)))
|
#Assert("format_time_ns petasecond", "317097c 91y 11M 2w 4j 1h 46m 40s", format_time_ns(int(1e24)));
|
||||||
# Assert("format_time_ns galactic year", "2300000c", format_time_ns(int(725328e19)))
|
#Assert("format_time_ns galactic year", "2300000c", format_time_ns(int(725328e19)));
|
||||||
# Assert("format_time_ns eon", "10000000c", format_time_ns(int(31536e21)))
|
#Assert("format_time_ns eon", "10000000c", format_time_ns(int(31536e21)));
|
||||||
# Assert("format_time_ns kalpa", "43200000c", format_time_ns(int(13623552e19)))
|
#Assert("format_time_ns kalpa", "43200000c", format_time_ns(int(13623552e19)));
|
||||||
# Assert("format_time_ns exasecond", "317097919c 83y 9M 1h 46m 40s", format_time_ns(int(1e27)))
|
#Assert("format_time_ns exasecond", "317097919c 83y 9M 1h 46m 40s", format_time_ns(int(1e27)));
|
||||||
# Assert("format_time_ns zettasecond", "", format_time_ns(int(1e30)))
|
#Assert("format_time_ns zettasecond", "", format_time_ns(int(1e30)));
|
||||||
# Assert("format_time_ns yottasecond", "", format_time_ns(int(1e33)))
|
#Assert("format_time_ns yottasecond", "", format_time_ns(int(1e33)));
|
||||||
# Assert("format_time_ns ronnasecond", "", format_time_ns(int(1e36)))
|
#Assert("format_time_ns ronnasecond", "", format_time_ns(int(1e36)));
|
||||||
# Assert("format_time_ns quettasecond", "", format_time_ns(int(1e39)))
|
#Assert("format_time_ns quettasecond", "", format_time_ns(int(1e39)));
|
||||||
# uint64_t_MAX == 2**64 == 18446744073709551615 == -1
|
# uint64_t_MAX == 2**64 == 18446744073709551615I64u == -1
|
||||||
Assert("format_time_ns max", "5c 84y 11M 2j 23h 34m 33s 709ms 551µs 615ns", format_time_ns(2**64 - 1))
|
Assert("format_time_ns max", "5c 84y 11M 2j 23h 34m 33s 709ms 551µs 615ns", format_time_ns(2**64 - 1))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user