Moved DEBUG option to config files

2023-07-14 23:57:58 +02:00
parent e6194ac485
commit 399024da7a
12 changed files with 280 additions and 268 deletions
--- a/cpp/Makefile
+++ b/cpp/Makefile
@@ -5,7 +5,6 @@ MODELS_DIR := models
 OUT_DIR := out
 SRC_DIR := .
 #CFLAGS := -O0 -Werror=all-warnings -g -G
-#CFLAGS := $(CFLAGS) -D__DEBUG
 #CFLAGS := $(CFLAGS) -pg
 #CFLAGS := $(CFLAGS) -Xptxas=-w
 #CFLAGS := $(CFLAGS) -Xcompiler -Wall,-O0,-g,-Werror,-Werror=implicit-fallthrough=0,-Wextra,-rdynamic
--- a/cpp/ViolaJones.hpp
+++ b/cpp/ViolaJones.hpp
@@ -6,28 +6,35 @@ namespace fs = std::filesystem;
 //#include "config.hpp"

 template <typename T>
-void unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
+bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
 	if (cpu.shape != gpu.shape) {
+#if __DEBUG
 		fprintf(stderr, "Inequal shape !\n");
-		return;
+#endif
+		return false;
 	}
 	size_t eq = 0;
 	const size_t length = np::prod(cpu.shape);
 	for (size_t i = 0; i < length; ++i)
 		if (cpu[i] == gpu[i])
 			++eq;
-		//else
-		//	std::cout << i << ": " << cpu[i] << " != " << gpu[i] << std::endl;
+
+#if __DEBUG
 	if (eq != length)
 		printf("Incorrect results, Number of equalities : %s/%s <=> %.2f%% !\n", thousand_sep(eq).c_str(), thousand_sep(length).c_str(),
 				static_cast<float64_t>(eq) / static_cast<float64_t>(length) * 100.0);
+#endif
+
+	return eq == length;
 }

 template <typename T>
-void unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
+bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
 	if (a.shape != indices.shape) {
+#if __DEBUG
 		fprintf(stderr, "Inequal shape !\n");
-		return;
+#endif
+		return false;
 	}
 	size_t correct = a.shape[0]; // First elements are always correctly sorted
 	const size_t total = np::prod(a.shape);
@@ -37,34 +44,37 @@ void unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indi
 			if(a[i + indices[k]] <= a[i + indices[k + 1]])
 				++correct;
 		}
+#if __DEBUG
 	if (correct != total)
 		printf("Incorrect results, Number of equalities : %s/%s <=> %.2f%% !\n", thousand_sep(correct).c_str(), thousand_sep(total).c_str(),
 				static_cast<float64_t>(correct) / static_cast<float64_t>(total) * 100.0);
+#endif
+	return correct == total;
 }

 template <typename T, typename F, typename... Args>
 T benchmark_function(const char* step_name, const F& fnc, Args &&...args) noexcept {
-#ifndef __DEBUG
+#if __DEBUG == false
 	printf("%s...\r", step_name);
 	fflush(stdout); // manual flush is mandatory, otherwise it will not be shown immediately because the output is buffered
 #endif
 	const auto start = time();
 	const T res = fnc(std::forward<Args>(args)...);
 	const long long timespent = duration_ns(time() - start);
-	printf("| %-49s | %17s | %-29s |\n", step_name, thousand_sep(timespent).c_str(), format_time_ns(timespent).c_str());
+	printf("| %-49s | %18s | %-29s |\n", step_name, thousand_sep(timespent).c_str(), format_time_ns(timespent).c_str());
 	return res;
 }

 template <typename F, typename... Args>
 void benchmark_function_void(const char* step_name, const F& fnc, Args &&...args) noexcept {
-#ifndef __DEBUG
+#if __DEBUG == false
 	printf("%s...\r", step_name);
 	fflush(stdout); // manual flush is mandatory, otherwise it will not be shown immediately because the output is buffered
 #endif
 	const auto start = time();
 	fnc(std::forward<Args>(args)...);
 	const long long timespent = duration_ns(time() - start);
-	printf("| %-49s | %17s | %-29s |\n", step_name, thousand_sep(timespent).c_str(), format_time_ns(timespent).c_str());
+	printf("| %-49s | %18s | %-29s |\n", step_name, thousand_sep(timespent).c_str(), format_time_ns(timespent).c_str());
 }

 template <typename T, typename F, typename... Args>
@@ -76,23 +86,23 @@ np::Array<T> state_saver(const char* step_name, const char* filename, const bool
 	if (!fs::exists(filepath) || force_redo) {
 		bin = std::move(benchmark_function<np::Array<T>>(step_name, fnc, std::forward<Args>(args)...));
 		if(save_state){
-#ifndef __DEBUG
+#if __DEBUG == false
 			printf("Saving results of %s\r", step_name);
 			fflush(stdout);
 #endif
 			save<T>(bin, filepath);
-#ifndef __DEBUG
+#if __DEBUG == false
 			printf("%*c\r", 100, ' ');
 			fflush(stdout);
 #endif
 		}
 	} else {
-#ifndef __DEBUG
+#if __DEBUG == false
 		printf("Loading results of %s\r", step_name);
 		fflush(stdout);
 #endif
 		bin = std::move(load<T>(filepath));
-		printf("| %-49s | %17s | %-29s |\n", step_name, "None", "loaded saved state");
+		printf("| %-49s | %18s | %-29s |\n", step_name, "None", "loaded saved state");
 	}
 	return bin;
 }
@@ -113,7 +123,7 @@ std::array<np::Array<T>, N> state_saver(const char* step_name, const std::vector
 	if (abs || force_redo) {
 		bin = std::move(benchmark_function<std::array<np::Array<T>, N>>(step_name, fnc, std::forward<Args>(args)...));
 		if (save_state){
-#ifndef __DEBUG
+#if __DEBUG == false
 			printf("Saving results of %s\r", step_name);
 			fflush(stdout);
 #endif
@@ -122,13 +132,13 @@ std::array<np::Array<T>, N> state_saver(const char* step_name, const std::vector
 				sprintf(filepath, "%s/%s.bin", out_dir, filename);
 				save<T>(bin[i++], filepath);
 			}
-#ifndef __DEBUG
+#if __DEBUG == false
 			printf("%*c\r", 100, ' ');
 			fflush(stdout);
 #endif
 		}
 	} else {
-#ifndef __DEBUG
+#if __DEBUG == false
 		printf("Loading results of %s\r", step_name);
 		fflush(stdout);
 #endif
@@ -137,7 +147,7 @@ std::array<np::Array<T>, N> state_saver(const char* step_name, const std::vector
 			sprintf(filepath, "%s/%s.bin", out_dir, filename);
 			bin[i++] = std::move(load<T>(filepath));
 		}
-		printf("| %-49s | %17s | %-29s |\n", step_name, "None", "loaded saved state");
+		printf("| %-49s | %18s | %-29s |\n", step_name, "None", "loaded saved state");
 	}
 	return bin;
 }
--- a/cpp/ViolaJonesGPU.cu
+++ b/cpp/ViolaJonesGPU.cu
@@ -2,16 +2,7 @@
 #include "data.hpp"
 #include "toolbox.hpp"
 #include "ViolaJones.hpp"
-
-#define NB_THREADS 1024
-
-#define NB_THREADS_2D_X 32
-#define NB_THREADS_2D_Y 32
-__device__ constexpr const size_t M = 5; //log2(NB_THREADS_2D_Y));
-
-#define NB_THREADS_3D_X 16
-#define NB_THREADS_3D_Y 16
-#define NB_THREADS_3D_Z 4
+#include "config.hpp"

 static __global__ void __test_working_kernel__(const np::Array<size_t> d_x, np::Array<size_t> d_y, const size_t length) {
 	const size_t i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -22,7 +13,7 @@ static __global__ void __test_working_kernel__(const np::Array<size_t> d_x, np::
 void test_working(const size_t& length) noexcept {
 	const size_t size = length * sizeof(size_t);

-#ifdef __DEBUG
+#if __DEBUG
 	print("Estimating memory footprint at : " + format_byte_size(2 * size));
 #endif

@@ -64,7 +55,7 @@ void test_working_2d(const size_t& N1, const size_t& N2) noexcept {
 	const size_t length = N1 * N2;
 	const size_t size = length * sizeof(size_t);

-#ifdef __DEBUG
+#if __DEBUG
 	print("Estimating memory footprint at : " + format_byte_size(2 * size));
 #endif

@@ -107,7 +98,7 @@ void test_working_3d(const size_t& N1, const size_t& N2, const size_t& N3) noexc
 	const size_t length = N1 * N2 * N3;
 	const size_t size = length * sizeof(size_t);

-#ifdef __DEBUG
+#if __DEBUG
 	print("Estimating memory footprint at : " + format_byte_size(2 * size));
 #endif

--- a/cpp/config.hpp
+++ b/cpp/config.hpp
@@ -1,5 +1,21 @@
 #pragma once

+#define DATA_DIR "../data"
+#define OUT_DIR "./out"
+#define MODEL_DIR "./models"
+
+#ifdef __CUDACC__
+#define NB_THREADS 1024
+
+#define NB_THREADS_2D_X 32
+#define NB_THREADS_2D_Y 32
+__device__ constexpr const size_t M = 5; //log2(NB_THREADS_2D_Y));
+
+#define NB_THREADS_3D_X 16
+#define NB_THREADS_3D_Y 16
+#define NB_THREADS_3D_Z 4
+#endif
+
 // Save state to avoid recalulation on restart
 #define SAVE_STATE true
 // Redo the state even if it's already saved
@@ -12,3 +28,11 @@
 // const size_t TS[] =  { 1, 5, 10, 25, 50 };
 // const size_t TS[] =  { 1, 5, 10, 25, 50, 100, 200, 300 };
 const size_t TS[] =  { 1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000 };
+
+// Enable verbose output (for debugging purposes)
+#define __DEBUG false
+// Debugging options
+#if __DEBUG
+#define IDX_INSPECT 4548
+#define IDX_INSPECT_OFFSET 100
+#endif
--- a/cpp/data.hpp
+++ b/cpp/data.hpp
@@ -5,10 +5,8 @@
 #include <cassert>
 #include <functional>
 #include <memory>
+#include "config.hpp"

-#define DATA_DIR "../data"
-#define OUT_DIR "./out"
-#define MODEL_DIR "./models"
 #define BUFFER_SIZE 256
 #define STRING_INT_SIZE 8 // Length of a number in log10 (including '-')
 #define S(N) std::string(N, '-').c_str()
@@ -42,20 +40,20 @@ namespace np {
 		size_t length = 0;
 		size_t* data = nullptr;
 		size_t* refcount = nullptr;
-#ifdef __DEBUG
+#if __DEBUG
 		size_t total = 1;
 #endif

 		__host__ __device__
 		Shape() noexcept {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Shape created (default)");
 // #endif
 		}

 		__host__ __device__
 		Shape(const size_t& length, size_t* data) noexcept : length(length), data(data), refcount(new size_t(1)) {
-#ifdef __DEBUG
+#if __DEBUG
 			//print("Shape created (raw)");
 			for(size_t i = 0; i < length; ++i)
 				total *= data[i];
@@ -64,13 +62,13 @@ namespace np {

 		__host__ __device__
 		Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Shape created (initializer)");
 // #endif
 			const auto* begin = dims.begin();
 			for(size_t i = 0; i < length; ++i){
 				data[i] = begin[i];
-#ifdef __DEBUG
+#if __DEBUG
 				total *= data[i];
 #endif
 			}
@@ -78,17 +76,17 @@ namespace np {

 		__host__ __device__
 		Shape(const Shape& shape) noexcept {
-#ifdef __DEBUG
+#if __DEBUG
 			print("Shape created (copy)");
 #endif
 			if (data != nullptr && data != shape.data){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former shape deleted (copy)");
 #endif
 				delete[] data;
 			}
 			if (refcount != nullptr && refcount != shape.refcount){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former shape refcount freed (copy)");
 #endif
 				delete refcount;
@@ -104,7 +102,7 @@ namespace np {
 			refcount = shape.refcount;
 			if (refcount != nullptr)
 				(*refcount)++;
-#ifdef __DEBUG
+#if __DEBUG
 			else
 				print("Moved shape has null refcount");
 			total = shape.total;
@@ -113,17 +111,17 @@ namespace np {

 		__host__ __device__
 		Shape(Shape&& shape) noexcept {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Shape created (move));
 // #endif
 			if (data != nullptr && data != shape.data){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former shape deleted (move)");
 #endif
 				delete[] data;
 			}
 			if (refcount != nullptr && refcount != shape.refcount){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former shape refcount freed (move)");
 #endif
 				delete refcount;
@@ -135,7 +133,7 @@ namespace np {
 			shape.length = 0;
 			shape.data = nullptr;
 			shape.refcount = nullptr;
-#ifdef __DEBUG
+#if __DEBUG
 			total = shape.total;
 			shape.total = 1;
 #endif
@@ -144,30 +142,30 @@ namespace np {
 		__host__ __device__
 		~Shape() noexcept {
 			if(refcount == nullptr){
-// #ifdef __DEBUG
+// #if __DEBUG
 // 					print("Shape refcount freed more than once");
 // #endif
 					return;
 			}
 			--(*refcount);
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			printf("Shape destructed : %lu\n", *refcount);
 // #endif
 			if(*refcount == 0){
 				if (data != nullptr){
 					delete[] data;
 					data = nullptr;
-// #ifdef __DEBUG
+// #if __DEBUG
 // 					print("Shape freeing ...");
 // #endif
 				}
-//#ifdef __DEBUG
+//#if __DEBUG
 				else
 					printf("Shape freed more than once : %lu\n", *refcount);
 //#endif
 				delete refcount;
 				refcount = nullptr;
-#ifdef __DEBUG
+#if __DEBUG
 				total = 1;
 #endif
 			}
@@ -175,17 +173,17 @@ namespace np {

 		__host__ __device__
 		Shape& operator=(const Shape& shape) noexcept {
-#ifdef __DEBUG
+#if __DEBUG
 			print("Shape created (assign copy)");
 #endif
 			if (data != nullptr && data != shape.data){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former shape deleted (assign copy)");
 #endif
 				delete[] data;
 			}
 			if (refcount != nullptr && refcount != shape.refcount){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former shape refcount freed (assign copy)");
 #endif
 				delete refcount;
@@ -201,7 +199,7 @@ namespace np {
 			refcount = shape.refcount;
 			if (refcount != nullptr)
 				(*refcount)++;
-#ifdef __DEBUG
+#if __DEBUG
 			else
 				printf("Assigned copy shape has null refcount");
 			total = shape.total;
@@ -211,17 +209,17 @@ namespace np {

 		__host__ __device__
 		Shape& operator=(Shape&& shape) noexcept {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Shape created (assign move)");
 // #endif
 			if (data != nullptr && data != shape.data){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former shape deleted (assign move)");
 #endif
 				delete[] data;
 			}
 			if (refcount != nullptr && refcount != shape.refcount){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former shape refcount freed (assign move)");
 #endif
 				delete refcount;
@@ -229,7 +227,7 @@ namespace np {
 			length = shape.length;
 			data = shape.data;
 			refcount = shape.refcount;
-#ifdef __DEBUG
+#if __DEBUG
 			total = shape.total;
 			if (refcount == nullptr)
 				print("Assigned copy shape has null refcount");
@@ -244,7 +242,7 @@ namespace np {

 		__host__ __device__
 		constexpr size_t& operator[](const size_t& i) const {
-#ifdef __DEBUG
+#if __DEBUG
 		if (i > length){
 			printf("Index %lu out of shape length %lu\n", i, length);
 	#ifndef __CUDACC__
@@ -258,7 +256,7 @@ namespace np {
 		constexpr bool operator==(const Shape& other) const noexcept {
 			if (length != other.length)
 				return false;
-#ifdef __DEBUG
+#if __DEBUG
 			if (total != other.total)
 				return false;
 #endif
@@ -284,42 +282,42 @@ namespace np {

 		__host__ __device__
 		Array() noexcept {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Array created (default)");
 // #endif
 		}

 		__host__ __device__
 		Array(const Shape& shape, T* data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Array created (raw, copy shape)");
 // #endif
 		}

 		__host__ __device__
 		Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Array created (raw empty, copy shape)");
 // #endif
 		}

 		__host__ __device__
 		Array(Shape&& shape, T* data) noexcept : shape(std::move(shape)), data(data), refcount(new size_t(1)) {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Array created (raw, move shape)");
 // #endif
 		}

 		__host__ __device__
 		Array(Shape&& shape) noexcept : shape(std::move(shape)), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Array created (raw empty, move shape)");
 // #endif
 		}

 		__host__ __device__
 		Array(const Array& array) noexcept : shape(array.shape) {
-#ifdef __DEBUG
+#if __DEBUG
 			print("Array created (copy)");
 #endif
 			if (data != nullptr && data != array.data){
@@ -329,7 +327,7 @@ namespace np {
 				delete[] data;
 			}
 			if (refcount != nullptr && refcount != array.refcount){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former array refcount freed (move)");
 #endif
 				delete refcount;
@@ -344,7 +342,7 @@ namespace np {
 			refcount = array.refcount;
 			if (refcount != nullptr)
 				(*refcount)++;
-#ifdef __DEBUG
+#if __DEBUG
 			else
 				print("Moved array has null refcount");
 #endif
@@ -352,17 +350,17 @@ namespace np {

 		__host__ __device__
 		Array(Array&& array) noexcept {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Array created (move)");
 // #endif
 			if (data != nullptr && data != array.data){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former array deleted (move)");
 #endif
 				delete[] data;
 			}
 			if (refcount != nullptr && refcount != array.refcount){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former array refcount freed (move)");
 #endif
 				delete refcount;
@@ -378,24 +376,24 @@ namespace np {
 		__host__ __device__
 		~Array() noexcept {
 			if(refcount == nullptr){
-// #ifdef __DEBUG
+// #if __DEBUG
 // 				print("Array refcount freed more than once");
 // #endif
 				return;
 			}
 			--(*refcount);
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			printf("Array destructed : %lu\n", *refcount);
 // #endif
 			if(*refcount == 0){
 				if (data != nullptr){
 					delete[] data;
 					data = nullptr;
-// #ifdef __DEBUG
+// #if __DEBUG
 // 					print("Array freeing ...");
 // #endif
 				}
-#ifdef __DEBUG
+#if __DEBUG
 				else
 					printf("Array freed more than once : %lu\n", *refcount);
 #endif
@@ -406,17 +404,17 @@ namespace np {

 		__host__ __device__
 		Array& operator=(const Array& array) noexcept {
-#ifdef __DEBUG
+#if __DEBUG
 			print("Array created (assign copy)");
 #endif
 			if (data != nullptr && data != array.data){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former array deleted (assign copy)");
 #endif
 				delete[] data;
 			}
 			if (refcount != nullptr && refcount != array.refcount){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former array refcount freed (assign copy)");
 #endif
 				delete refcount;
@@ -433,7 +431,7 @@ namespace np {
 			refcount = array.refcount;
 			if (refcount != nullptr)
 				(*refcount)++;
-#ifdef __DEBUG
+#if __DEBUG
 			else
 				print("Assigned array has null refcount");
 #endif
@@ -442,17 +440,17 @@ namespace np {

 		__host__ __device__
 		Array& operator=(Array&& array) noexcept {
-// #ifdef __DEBUG
+// #if __DEBUG
 // 			print("Array created (assign move)");
 // #endif
 			if (data != nullptr && data != array.data){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former array deleted (assign move)");
 #endif
 				delete[] data;
 			}
 			if (refcount != nullptr && refcount != array.refcount){
-#ifdef __DEBUG
+#if __DEBUG
 				print("Former array refcount freed (assign move)");
 #endif
 				delete refcount;
@@ -522,7 +520,7 @@ namespace np {
 	template<typename T>
 	__host__ __device__
 	constexpr T& Array<T>::operator[](const size_t& i) const {
-#ifdef __DEBUG
+#if __DEBUG
 		if (i > shape.total){
 			printf("Index %lu out of array size %lu\n", i, shape.total);
 	#ifndef __CUDACC__
@@ -563,7 +561,7 @@ namespace np {
 	template<typename T>
 	template<typename F>
 	Array<T> Array<T>::operator*(const Array<F>& other) const {
-#ifdef __DEBUG
+#if __DEBUG
 		if (shape != other.shape){
 			printf("Incompatible shapes\n");
 			throw;
@@ -598,7 +596,7 @@ namespace np {
 	template<typename T>
 	template<typename F>
 	Array<T>& Array<T>::operator*=(const Array<F>& other) {
-#ifdef __DEBUG
+#if __DEBUG
 		if (shape != other.shape){
 			printf("Incompatible shapes\n");
 			throw;
@@ -613,7 +611,7 @@ namespace np {
 	template<typename T>
 	template<typename F>
 	Array<T>& Array<T>::operator+=(const Array<F>& other) {
-#ifdef __DEBUG
+#if __DEBUG
 		if (shape != other.shape){
 			printf("Incompatible shapes\n");
 			throw;
@@ -638,7 +636,7 @@ namespace np {
 	template<typename T>
 	template<typename F>
 	Array<T> Array<T>::operator-(const np::Array<F>& other) const {
-#ifdef __DEBUG
+#if __DEBUG
 		if (shape != other.shape){
 			printf("Incompatible shapes\n");
 			throw;
@@ -921,7 +919,7 @@ np::Array<T> copyToDevice(const char* name, const np::Array<T>& array) noexcept
 	_print_cuda_error_(name, cudaMemcpy(d_array.data, array.data, array_size, cudaMemcpyHostToDevice));
 	//_print_cuda_error_(name, cudaMemcpy(d_array.shape.refcount, array.shape.refcount, sizeof(size_t), cudaMemcpyHostToDevice));
 	_print_cuda_error_(name, cudaMemcpy(d_array.shape.data, array.shape.data, shape_size, cudaMemcpyHostToDevice));
-#ifdef __DEBUG
+#if __DEBUG
 	d_array.shape.total = np::prod(array.shape);
 #endif
 	return d_array;
--- a/cpp/projet.cpp
+++ b/cpp/projet.cpp
@@ -9,13 +9,6 @@ namespace fs = std::filesystem;

 void test_float() noexcept;

-#ifdef __DEBUG
-// #define IDX_INSPECT 0
-// #define IDX_INSPECT 2
-#define IDX_INSPECT 4548
-#define IDX_INSPECT_OFFSET 100
-#endif
-
 #if GPU_BOOSTED
 #define LABEL "GPU"
 #define apply_features apply_features_gpu
@@ -34,34 +27,34 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
 		for (const char* const folder_name : { "models", "out" })
 			fs::create_directory(folder_name);

-	printf("| %-49s | %-17s | %-29s	|\n", "Preprocessing", "Time spent (ns)", "Formatted time spent");
-	printf("|%s|%s|%s|\n", S(51), S(19), S(31));
+	printf("| %-49s | %-18s | %-29s |\n", "Preprocessing", "Time spent (ns)", "Formatted time spent");
+	printf("|%s|%s|%s|\n", S(51), S(20), S(31));

 	const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", {"X_train", "y_train", "X_test", "y_test"},
 			FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets);

-#ifdef __DEBUG
-	// print("X_train");
-	// print(X_train.shape);
-	// print(X_train, { IDX_INSPECT });
-	// print("X_test");
-	// print(X_test.shape);
-	// print(X_test, { IDX_INSPECT });
-	// print("y_train");
-	// print(y_train.shape);
-	// print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
-	// print("y_test");
-	// print(y_test.shape);
-	// print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
+#if __DEBUG
+	print("X_train");
+	print(X_train.shape);
+	print(X_train, { IDX_INSPECT });
+	print("X_test");
+	print(X_test.shape);
+	print(X_test, { IDX_INSPECT });
+	print("y_train");
+	print(y_train.shape);
+	print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
+	print("y_test");
+	print(y_test.shape);
+	print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
 #endif

 	const np::Array<uint8_t> feats = state_saver<uint8_t>("Building features", "feats",
 			FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]);

-#ifdef __DEBUG
-	// print("feats");
-	// print(feats.shape);
-	// print_feat(feats, { IDX_INSPECT });
+#if __DEBUG
+	print("feats");
+	print(feats.shape);
+	print_feat(feats, { IDX_INSPECT });
 #endif

 	const np::Array<uint32_t> X_train_ii = state_saver<uint32_t>("Converting training set to integral images (" LABEL ")", "X_train_ii_" LABEL,
@@ -69,14 +62,13 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
 	const np::Array<uint32_t> X_test_ii = state_saver<uint32_t>("Converting testing set to integral images (" LABEL ")", "X_test_ii_" LABEL,
 			FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test);

-#ifdef __DEBUG
-	// print("X_train_ii");
-	// print(X_train_ii.shape);
-	// print(X_train_ii, { IDX_INSPECT });
-	// print("X_test_ii");
-	// print(X_test_ii.shape);
-	// print(X_test_ii, { IDX_INSPECT });
-	// return {};
+#if __DEBUG
+	print("X_train_ii");
+	print(X_train_ii.shape);
+	print(X_train_ii, { IDX_INSPECT });
+	print("X_test_ii");
+	print(X_test_ii.shape);
+	print(X_test_ii, { IDX_INSPECT });
 #endif

 	const np::Array<int32_t> X_train_feat = state_saver<int32_t>("Applying features to training set (" LABEL ")", "X_train_feat_" LABEL,
@@ -84,46 +76,46 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
 	const np::Array<int32_t> X_test_feat = state_saver<int32_t>("Applying features to testing set (" LABEL ")", "X_test_feat_" LABEL,
 			FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii);

-#ifdef __DEBUG
-	// print("X_train_feat");
-	// print(X_train_feat.shape);
-	// print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
-	// print("X_test_feat");
-	// print(X_test_feat.shape);
-	// print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
+#if __DEBUG
+	print("X_train_feat");
+	print(X_train_feat.shape);
+	print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
+	print("X_test_feat");
+	print(X_test_feat.shape);
+	print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
 #endif

 	// const Array<int> indices = measure_time_save<Array<int>>("Selecting best features", "indices", select_percentile, X_train_feat, d.y_train);
 	// const Array<int> indices = measure_time<Array<int>>("Selecting best features", select_percentile, X_train_feat, d.y_train);

-#ifdef __DEBUG
+#if __DEBUG
 	// print_feature(indices);
 #endif

 	const np::Array<uint16_t> X_train_feat_argsort = state_saver<uint16_t>("Precalculating training set argsort (" LABEL ")", "X_train_feat_argsort_" LABEL,
 			FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat);

-#ifdef __DEBUG
+#if __DEBUG
 	print("X_train_feat_argsort");
 	print(X_train_feat_argsort.shape);
 	print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
 #endif

-	// const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", "X_test_feat_argsort_" LABEL,
-	// 		FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
+	const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", "X_test_feat_argsort_" LABEL,
+			FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);

-#ifdef __DEBUG
-	// print("X_test_feat_argsort");
-	// print(X_test_feat_argsort.shape);
-	// print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
+#if __DEBUG
+	print("X_test_feat_argsort");
+	print(X_test_feat_argsort.shape);
+	print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
 #endif

 	return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test };
 }

 void train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) {
-	printf("\n| %-49s | %-17s | %-29s |\n", "Training", "Time spent (ns)", "Formatted time spent");
-	printf("|%s|%s|%s|\n", S(51), S(19), S(31));
+	printf("\n| %-49s | %-18s | %-29s |\n", "Training", "Time spent (ns)", "Formatted time spent");
+	printf("|%s|%s|%s|\n", S(51), S(20), S(31));

 	for (const size_t T : TS) {
 		char title[BUFFER_SIZE] = { 0 };
@@ -133,13 +125,13 @@ void train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_
 		sprintf(alphas_title, "alphas_%lu_%s", T, LABEL);
 		sprintf(final_classifiers_title, "final_classifiers_%lu_%s", T, LABEL);

-#ifdef __DEBUG
+#if __DEBUG
 		const auto [ alphas, final_classifiers ] = state_saver<float64_t, 2>(title, { alphas_title, final_classifiers_title },
 #else
 		state_saver<float64_t, 2>(title, { alphas_title, final_classifiers_title },
 #endif
 				FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train);
-#ifdef __DEBUG
+#if __DEBUG
 		print("alphas");
 		print(alphas);
 		print("final_classifiers");
@@ -298,9 +290,9 @@ void final_unit_test() {
 }

 int main(){
-#ifdef __DEBUG
-	printf("| %-49s | %-17s | %-29s	|\n", "Unit testing", "Time spent (ns)", "Formatted time spent");
-	printf("|%s|%s|%s|\n", S(51), S(19), S(31));
+#if __DEBUG
+	printf("| %-49s | %-18s | %-29s |\n", "Unit testing", "Time spent (ns)", "Formatted time spent");
+	printf("|%s|%s|%s|\n", S(51), S(20), S(31));
 	benchmark_function_void("Testing GPU capabilities 1D", test_working, 3 + (1<<29));
 	benchmark_function_void("Testing GPU capabilities 2D", test_working_2d, 3 + (1<<15), 2 + (1<<14));
 	benchmark_function_void("Testing GPU capabilities 3D", test_working_3d, 9 + (1<<10), 5 + (1<<10), 7 + (1<<9));
@@ -313,7 +305,7 @@ int main(){
 	train(X_train_feat, X_train_feat_argsort, y_train);
 	testing_and_evaluating(X_train_feat, y_train, X_test_feat, y_test);
 	final_unit_test();
-#ifdef __DEBUG
+#if __DEBUG
 	printf("\nAFTER CLEANUP\n");
 #endif
 	return EXIT_SUCCESS;
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -21,7 +21,7 @@ void clearProgress() noexcept {

 template<typename T>
 void test(const uint64_t& N) noexcept {
-#ifdef __DEBUG
+#if __DEBUG
 	printf("DETERMINISTIC for N=%s of %s sized %s\n", thousand_sep(N).c_str(), typeid(T).name(), format_byte_size(sizeof(T)).c_str());
 	print("Estimating memory footprint at : " + format_byte_size(3 * N * sizeof(T)));
 #endif
--- a/python/ViolaJonesGPU.py
+++ b/python/ViolaJonesGPU.py
@@ -1,12 +1,7 @@
-from numba import float64, uint32, cuda, int32, uint16
-from config import COMPILE_WITH_C
+from numba import float64, uint32, cuda, int32
+from config import COMPILE_WITH_C, NB_THREADS, NB_THREADS_2D, NB_THREADS_3D, M
 import numpy as np

-NB_THREADS = 1024
-NB_THREADS_2D = (32, 32)
-NB_THREADS_3D = (16, 16, 4)
-M = int(np.log2(NB_THREADS_2D[1]))
-
 if COMPILE_WITH_C:
 	from numba import njit
 else:
--- a/python/common.py
+++ b/python/common.py
@@ -2,6 +2,7 @@ from toolbox import picke_multi_loader, format_time_ns, unit_test_argsort_2d
 from typing import List, Tuple
 from time import perf_counter_ns
 import numpy as np
+from config import OUT_DIR, DATA_DIR

 def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e-8) -> None:
 	"""Test if the each result is equals to other devices.
@@ -20,32 +21,32 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e
 	fnc_s = perf_counter_ns()
 	n_total= 0
 	n_success = 0
-	print(f"\n| {'Unit testing':<37} | {'Test state':<10} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |")
-	print(f"|{'-'*39}|{'-'*12}|{'-'*19}|{'-'*31}|")
+	print(f"\n| {'Unit testing':<37} | {'Test state':<10} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |")
+	print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|")

 	for filename in ["X_train_feat", "X_test_feat", "X_train_ii", "X_test_ii"]:
 		print(f"{filename}...", end = "\r")
-		bs = picke_multi_loader([f"{filename}_{label}" for label in labels], "./out")
+		bs = picke_multi_loader([f"{filename}_{label}" for label in labels], OUT_DIR)

 		for i, (b1, l1) in enumerate(zip(bs, labels)):
 			if b1 is None:
-				#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
+				#print(f"| {filename:<22} - {l1:<4}         | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
 				continue
 			for j, (b2, l2) in enumerate(zip(bs, labels)):
 				if i >= j:
 					continue
 				if b2 is None:
-					#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
+					#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
 					continue
 				n_total += 1
 				s = perf_counter_ns()
 				state = np.abs(b1 - b2).mean() < tol
 				e = perf_counter_ns() - s
 				if state:
-					print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
+					print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
 					n_success += 1
 				else:
-					print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
+					print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")

 	for filename, featname in zip(["X_train_feat_argsort", "X_test_feat_argsort"], ["X_train_feat", "X_test_feat"]):
 		print(f"Loading {filename}...", end = "\r")
@@ -53,14 +54,14 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e
 		bs = []
 		for label in labels:
 			if feat is None:
-				feat_tmp = picke_multi_loader([f"{featname}_{label}"], "./out")[0]
+				feat_tmp = picke_multi_loader([f"{featname}_{label}"], OUT_DIR)[0]
 				if feat_tmp is not None:
 					feat = feat_tmp
-			bs.append(picke_multi_loader([f"{filename}_{label}"], "./out")[0])
+			bs.append(picke_multi_loader([f"{filename}_{label}"], OUT_DIR)[0])

 		for i, (b1, l1) in enumerate(zip(bs, labels)):
 			if b1 is None:
-				#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
+				#print(f"| {filename:<22} - {l1:<4}         | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
 				continue
 			if feat is not None:
 				n_total += 1
@@ -68,26 +69,26 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e
 				state = unit_test_argsort_2d(feat, b1)
 				e = perf_counter_ns() - s
 				if state:
-					print(f"| {filename:<22} - {l1:<4} argsort | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
+					print(f"| {filename:<22} - {l1:<4} argsort | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
 					n_success += 1
 				else:
-					print(f"| {filename:<22} - {l1:<4} argsort | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
+					print(f"| {filename:<22} - {l1:<4} argsort | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")

 			for j, (b2, l2) in enumerate(zip(bs, labels)):
 				if i >= j:
 					continue
 				if b2 is None:
-					#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
+					#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
 					continue
 				n_total += 1
 				s = perf_counter_ns()
 				state = np.abs(b1 - b2).mean() < tol
 				e = perf_counter_ns() - s
 				if state:
-					print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
+					print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
 					n_success += 1
 				else:
-					print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
+					print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")

 	for T in TS:
 		for filename in ["alphas", "final_classifiers"]:
@@ -96,32 +97,33 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e

 			for i, (b1, l1) in enumerate(zip(bs, labels)):
 				if b1 is None:
-					#print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
+					#print(f"| {filename + '_' + str(T):<22} - {l1:<4}         | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
 					continue
 				for j, (b2, l2) in enumerate(zip(bs, labels)):
 					if i >= j:
 						continue
 					if b2 is None:
-						#print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
+						#print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
 						continue
 					n_total += 1
 					s = perf_counter_ns()
 					state = np.abs(b1 - b2).mean() < tol
 					e = perf_counter_ns() - s
 					if state:
-						print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
+						print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
 						n_success += 1
 					else:
-						print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
-	print(f"|{'-'*39}|{'-'*12}|{'-'*19}|{'-'*31}|")
-	e = perf_counter_ns() - fnc_s
-	print(f"| {'Unit testing summary':<37} | {str(n_success) + '/' + str(n_total):>10} | {e:>17,} | {format_time_ns(e):<29} |")
+						print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")

-def load_datasets(data_dir: str = "../data") -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+	print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|")
+	e = perf_counter_ns() - fnc_s
+	print(f"| {'Unit testing summary':<37} | {str(n_success) + '/' + str(n_total):>10} | {e:>18,} | {format_time_ns(e):<29} |")
+
+def load_datasets(data_dir: str = DATA_DIR) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
 	"""Load the datasets.

 	Args:
-		data_dir (str, optional): [description]. Defaults to "../data".
+		data_dir (str, optional): [description]. Defaults to DATA_DIR (see config.py).

 	Returns:
 		Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: [description]
--- a/python/config.py
+++ b/python/config.py
@@ -1,3 +1,14 @@
+import numpy as np
+
+DATA_DIR = "../data"
+OUT_DIR = "./out"
+MODEL_DIR = "./models"
+
+NB_THREADS = 1024
+NB_THREADS_2D = (32, 32)
+NB_THREADS_3D = (16, 16, 4)
+M = int(np.log2(NB_THREADS_2D[1]))
+
 # Save state to avoid recalulation on restart
 SAVE_STATE = True
 # Redo the state even if it's already saved
@@ -5,7 +16,7 @@ FORCE_REDO = False
 # Use NJIT to greatly accelerate runtime
 COMPILE_WITH_C = False
 # Use GPU to greatly accelerate runtime (as priority over NJIT)
-GPU_BOOSTED = False
+GPU_BOOSTED = True
 # Number of weak classifiers
 # TS = [1]
 # TS = [1, 5, 10]
@@ -13,3 +24,12 @@ GPU_BOOSTED = False
 # TS = [1, 5, 10, 25, 50, 100, 200]
 # TS = [1, 5, 10, 25, 50, 100, 200, 300]
 TS = [1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000]
+# Enable verbose output (for debugging purposes)
+__DEBUG = False
+# Debugging options
+if __DEBUG:
+	IDX_INSPECT = 4548
+	IDX_INSPECT_OFFSET = 100
+	np.seterr(all = 'raise')
+	# Debug option (image width * log_10(length) + extra characters)
+	np.set_printoptions(linewidth = 19 * 6 + 3)
--- a/python/projet.py
+++ b/python/projet.py
@@ -12,9 +12,9 @@ from time import perf_counter_ns
 from os import makedirs
 import numpy as np

-#np.seterr(all = 'raise')
-
-from config import FORCE_REDO, COMPILE_WITH_C, GPU_BOOSTED, TS, SAVE_STATE
+from config import FORCE_REDO, COMPILE_WITH_C, GPU_BOOSTED, TS, SAVE_STATE, MODEL_DIR, __DEBUG
+if __DEBUG:
+	from config import IDX_INSPECT, IDX_INSPECT_OFFSET

 if GPU_BOOSTED:
 	from ViolaJonesGPU import apply_features, set_integral_image, argsort
@@ -26,12 +26,6 @@ else:
 	from ViolaJonesCPU import apply_features, set_integral_image, argsort
 	label = 'CPU' if COMPILE_WITH_C else 'PY'

-# FIXME Debug code
-# IDX_INSPECT = 0
-# IDX_INSPECT = 2
-IDX_INSPECT = 4548
-IDX_INSPECT_OFFSET = 100
-
 def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
 	"""Train the weak classifiers.

@@ -45,25 +39,23 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) ->
 	"""
 	feats = state_saver("Building features", "feats", lambda: build_features(X_train.shape[1], X_train.shape[2]), FORCE_REDO, SAVE_STATE)

-	# FIXME Debug code
-	# print("feats")
-	# print(feats.shape)
-	# print(feats[IDX_INSPECT].ravel())
-	# return 0, 0
+	if __DEBUG:
+		print("feats")
+		print(feats.shape)
+		print(feats[IDX_INSPECT].ravel())

 	X_train_ii = state_saver(f"Converting training set to integral images ({label})", f"X_train_ii_{label}",
 							lambda: set_integral_image(X_train), FORCE_REDO, SAVE_STATE)
 	X_test_ii = state_saver(f"Converting testing set to integral images ({label})", f"X_test_ii_{label}",
 							lambda: set_integral_image(X_test), FORCE_REDO, SAVE_STATE)

-	# FIXME Debug code
-	# print("X_train_ii")
-	# print(X_train_ii.shape)
-	# print(X_train_ii[IDX_INSPECT])
-	# print("X_test_ii")
-	# print(X_test_ii.shape)
-	# print(X_test_ii[IDX_INSPECT])
-	# return 0, 0
+	if __DEBUG:
+		print("X_train_ii")
+		print(X_train_ii.shape)
+		print(X_train_ii[IDX_INSPECT])
+		print("X_test_ii")
+		print(X_test_ii.shape)
+		print(X_test_ii[IDX_INSPECT])

 	X_train_feat = state_saver(f"Applying features to training set ({label})", f"X_train_feat_{label}",
 							lambda: apply_features(feats, X_train_ii), FORCE_REDO, SAVE_STATE)
@@ -71,14 +63,13 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) ->
 							lambda: apply_features(feats, X_test_ii), FORCE_REDO, SAVE_STATE)
 	del X_train_ii, X_test_ii, feats

-	# FIXME Debug code
-	# print("X_train_feat")
-	# print(X_train_feat.shape)
-	# print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
-	# print("X_test_feat")
-	# print(X_test_feat.shape)
-	# print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
-	# return 0, 0
+	if __DEBUG:
+		print("X_train_feat")
+		print(X_train_feat.shape)
+		print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
+		print("X_test_feat")
+		print(X_test_feat.shape)
+		print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])

 	#indices = state_saver("Selecting best features training set", "indices", force_redo = True, save_state = SAVE_STATE,
 	#						fnc = lambda: SelectPercentile(f_classif, percentile = 10).fit(X_train_feat.T, y_train).get_support(indices = True))
@@ -96,40 +87,35 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) ->

 	# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]

-	#return 0, 0
-
 	X_train_feat_argsort = state_saver(f"Precalculating training set argsort ({label})", f"X_train_feat_argsort_{label}",
 									lambda: argsort(X_train_feat), FORCE_REDO, SAVE_STATE)

-	# FIXME Debug code
-	# print("X_train_feat_argsort")
-	# print(X_train_feat_argsort.shape)
-	# print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
-	# benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort))
-	# return 0, 0
+	if __DEBUG:
+		print("X_train_feat_argsort")
+		print(X_train_feat_argsort.shape)
+		print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
+		benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort))

-	# X_test_feat_argsort = state_saver(f"Precalculating testing set argsort ({label})", f"X_test_feat_argsort_{label}",
-	# 								lambda: argsort(X_test_feat), True, False)
+	X_test_feat_argsort = state_saver(f"Precalculating testing set argsort ({label})", f"X_test_feat_argsort_{label}",
+									lambda: argsort(X_test_feat), FORCE_REDO, SAVE_STATE)

-	# FIXME Debug code
-	# print("X_test_feat_argsort")
-	# print(X_test_feat_argsort.shape)
-	# print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
-	# benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort))
-	# return 0, 0
-	# del X_test_feat_argsort
+	if __DEBUG:
+		print("X_test_feat_argsort")
+		print(X_test_feat_argsort.shape)
+		print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
+		benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort))
+		del X_test_feat_argsort

-	print(f"\n| {'Training':<49} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*19}|{'-'*31}|")
+	print(f"\n| {'Training':<49} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*20}|{'-'*31}|")

 	for T in TS:
-		# alphas, final_classifiers = state_saver(f"ViolaJones T = {T:<3} ({label})", [f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"],
-		state_saver(f"ViolaJones T = {T:<4} ({label})", [f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"],
-					lambda: train_viola_jones(T, X_train_feat, X_train_feat_argsort, y_train), FORCE_REDO, SAVE_STATE, "./models")
-		# FIXME Debug code
-		# print("alphas")
-		# print(alphas)
-		# print("final_classifiers")
-		# print(final_classifiers)
+		alphas, final_classifiers = state_saver(f"ViolaJones T = {T:<3} ({label})", [f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"],
+					lambda: train_viola_jones(T, X_train_feat, X_train_feat_argsort, y_train), FORCE_REDO, SAVE_STATE, MODEL_DIR)
+		if __DEBUG:
+			print("alphas")
+			print(alphas)
+			print("final_classifiers")
+			print(final_classifiers)

 	return X_train_feat, X_test_feat

@@ -183,43 +169,37 @@ def _main_() -> None:
 		for folder_name in ["models", "out"]:
 			makedirs(folder_name, exist_ok = True)

-	print(f"| {'Preprocessing':<49} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*19}|{'-'*31}|")
+	print(f"| {'Preprocessing':<49} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*20}|{'-'*31}|")

 	X_train, y_train, X_test, y_test = state_saver("Loading sets", ["X_train", "y_train", "X_test", "y_test"],
 	 											   load_datasets, FORCE_REDO, SAVE_STATE)

-	# FIXME Debug option (image width * log_10(length) + extra characters)
-	# np.set_printoptions(linewidth = 19 * 6 + 3)
-
-	# FIXME Debug code
-	# print("X_train")
-	# print(X_train.shape)
-	# print(X_train[IDX_INSPECT])
-	# print("X_test")
-	# print(X_test.shape)
-	# print(X_test[IDX_INSPECT])
-	# print("y_train")
-	# print(y_train.shape)
-	# print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
-	# print("y_test")
-	# print(y_test.shape)
-	# print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
-	# return
+	if __DEBUG:
+		print("X_train")
+		print(X_train.shape)
+		print(X_train[IDX_INSPECT])
+		print("X_test")
+		print(X_test.shape)
+		print(X_test[IDX_INSPECT])
+		print("y_train")
+		print(y_train.shape)
+		print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
+		print("y_test")
+		print(y_test.shape)
+		print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])

 	X_train_feat, X_test_feat = bench_train(X_train, X_test, y_train)

-	# FIXME Debug code
-	# return
-
-	# X_train_feat, X_test_feat = picke_multi_loader([f"X_train_feat_{label}", f"X_test_feat_{label}"], "./out")
-	# indices = picke_multi_loader(["indices"], "./out")[0]
+	# X_train_feat, X_test_feat = picke_multi_loader([f"X_train_feat_{label}", f"X_test_feat_{label}"], OUT_DIR)
+	# indices = picke_multi_loader(["indices"], OUT_DIR)[0]
 	# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]

 	bench_accuracy(label, X_train_feat, X_test_feat, y_train, y_test)

 if __name__ == "__main__":
-	#toolbox_unit_test()
 	_main_()
+	if __DEBUG:
+		toolbox_unit_test()

 	# Only execute unit test after having trained the specified labels
 	unit_test(TS, ["GPU", "CPU", "PY", "PGPU"])
--- a/python/toolbox.py
+++ b/python/toolbox.py
@@ -4,6 +4,7 @@ from numba import njit
 import numpy as np
 import pickle
 import os
+from config import MODEL_DIR, OUT_DIR

 formats = ["ns", "µs", "ms", "s", "m", "h", "j", "w", "M", "y"]
 nb = np.array([1, 1000, 1000, 1000, 60, 60, 24, 7, 4, 12], dtype = np.uint16)
@@ -48,12 +49,12 @@ def toolbox_unit_test() -> None:
 	# UINT64_MAX == 2^64 = 18446744073709551615 == -1
 	assert "635y 5M 3j 23h 34m 33s 709ms 551µs 616ns" == format_time_ns(2**64)

-def picke_multi_loader(filenames: List[str], save_dir: str = "./models") -> List[Any]:
+def picke_multi_loader(filenames: List[str], save_dir: str = MODEL_DIR) -> List[Any]:
 	"""Load multiple pickle data files.

 	Args:
 		filenames (List[str]): List of all the filename to load.
-		save_dir (str, optional): Path of the files to load. Defaults to "./models".
+		save_dir (str, optional): Path of the files to load. Defaults to MODELS_DIR (see config.py).

 	Returns:
 		List[Any]. List of loaded pickle data files.
@@ -82,10 +83,10 @@ def benchmark_function(step_name: str, fnc: Callable) -> Any:
 	s = perf_counter_ns()
 	b = fnc()
 	e = perf_counter_ns() - s
-	print(f"| {step_name:<49} | {e:>17,} | {format_time_ns(e):<29} |")
+	print(f"| {step_name:<49} | {e:>18,} | {format_time_ns(e):<29} |")
 	return b

-def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo: bool = False, save_state: bool = True, save_dir: str = "./out") -> Any:
+def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo: bool = False, save_state: bool = True, save_dir: str = OUT_DIR) -> Any:
 	"""Either execute a function then saves the result or load the already existing result.

 	Args:
@@ -93,7 +94,7 @@ def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo
 		filename (Union[str, List[str]]): Name or list of names of the filenames where the result(s) are saved.
 		fnc ([type]): Function to call.
 		force_redo (bool, optional): Recall the function even if the result(s) is already saved. Defaults to False.
-		save_dir (str, optional): Path of the directory to save the result(s). Defaults to "./out".
+		save_dir (str, optional): Path of the directory to save the result(s). Defaults to OUT_DIR (see config.py).

 	Returns:
 		Any: The result(s) of the called function
@@ -111,7 +112,7 @@ def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo
 			print(f"Loading results of {step_name}", end = '\r')
 			with open(f"{save_dir}/{filename}.pkl", "rb") as f:
 				res = pickle.load(f)
-			print(f"| {step_name:<49} | {'None':>17} | {'loaded saved state':<29} |")
+			print(f"| {step_name:<49} | {'None':>18} | {'loaded saved state':<29} |")
 			return res
 	elif isinstance(filename, list):
 		abs = False
@@ -129,7 +130,7 @@ def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo
 				print(' ' * 100, end = '\r')
 			return b

-		print(f"| {step_name:<49} | {'None':>17} | {'loaded saved state':<29} |")
+		print(f"| {step_name:<49} | {'None':>18} | {'loaded saved state':<29} |")
 		b = []
 		print(f"Loading results of {step_name}", end = '\r')
 		for fn in filename: