Moved DEBUG option to config files
This commit is contained in:
parent
e6194ac485
commit
399024da7a
@ -5,7 +5,6 @@ MODELS_DIR := models
|
||||
OUT_DIR := out
|
||||
SRC_DIR := .
|
||||
#CFLAGS := -O0 -Werror=all-warnings -g -G
|
||||
#CFLAGS := $(CFLAGS) -D__DEBUG
|
||||
#CFLAGS := $(CFLAGS) -pg
|
||||
#CFLAGS := $(CFLAGS) -Xptxas=-w
|
||||
#CFLAGS := $(CFLAGS) -Xcompiler -Wall,-O0,-g,-Werror,-Werror=implicit-fallthrough=0,-Wextra,-rdynamic
|
||||
|
@ -6,28 +6,35 @@ namespace fs = std::filesystem;
|
||||
//#include "config.hpp"
|
||||
|
||||
template <typename T>
|
||||
void unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
|
||||
bool unit_test_cpu_vs_gpu(const np::Array<T>& cpu, const np::Array<T>& gpu) noexcept {
|
||||
if (cpu.shape != gpu.shape) {
|
||||
#if __DEBUG
|
||||
fprintf(stderr, "Inequal shape !\n");
|
||||
return;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
size_t eq = 0;
|
||||
const size_t length = np::prod(cpu.shape);
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
if (cpu[i] == gpu[i])
|
||||
++eq;
|
||||
//else
|
||||
// std::cout << i << ": " << cpu[i] << " != " << gpu[i] << std::endl;
|
||||
|
||||
#if __DEBUG
|
||||
if (eq != length)
|
||||
printf("Incorrect results, Number of equalities : %s/%s <=> %.2f%% !\n", thousand_sep(eq).c_str(), thousand_sep(length).c_str(),
|
||||
static_cast<float64_t>(eq) / static_cast<float64_t>(length) * 100.0);
|
||||
#endif
|
||||
|
||||
return eq == length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
|
||||
bool unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indices) noexcept {
|
||||
if (a.shape != indices.shape) {
|
||||
#if __DEBUG
|
||||
fprintf(stderr, "Inequal shape !\n");
|
||||
return;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
size_t correct = a.shape[0]; // First elements are always correctly sorted
|
||||
const size_t total = np::prod(a.shape);
|
||||
@ -37,34 +44,37 @@ void unit_test_argsort_2d(const np::Array<T>& a, const np::Array<uint16_t>& indi
|
||||
if(a[i + indices[k]] <= a[i + indices[k + 1]])
|
||||
++correct;
|
||||
}
|
||||
#if __DEBUG
|
||||
if (correct != total)
|
||||
printf("Incorrect results, Number of equalities : %s/%s <=> %.2f%% !\n", thousand_sep(correct).c_str(), thousand_sep(total).c_str(),
|
||||
static_cast<float64_t>(correct) / static_cast<float64_t>(total) * 100.0);
|
||||
#endif
|
||||
return correct == total;
|
||||
}
|
||||
|
||||
template <typename T, typename F, typename... Args>
|
||||
T benchmark_function(const char* step_name, const F& fnc, Args &&...args) noexcept {
|
||||
#ifndef __DEBUG
|
||||
#if __DEBUG == false
|
||||
printf("%s...\r", step_name);
|
||||
fflush(stdout); // manual flush is mandatory, otherwise it will not be shown immediately because the output is buffered
|
||||
#endif
|
||||
const auto start = time();
|
||||
const T res = fnc(std::forward<Args>(args)...);
|
||||
const long long timespent = duration_ns(time() - start);
|
||||
printf("| %-49s | %17s | %-29s |\n", step_name, thousand_sep(timespent).c_str(), format_time_ns(timespent).c_str());
|
||||
printf("| %-49s | %18s | %-29s |\n", step_name, thousand_sep(timespent).c_str(), format_time_ns(timespent).c_str());
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename F, typename... Args>
|
||||
void benchmark_function_void(const char* step_name, const F& fnc, Args &&...args) noexcept {
|
||||
#ifndef __DEBUG
|
||||
#if __DEBUG == false
|
||||
printf("%s...\r", step_name);
|
||||
fflush(stdout); // manual flush is mandatory, otherwise it will not be shown immediately because the output is buffered
|
||||
#endif
|
||||
const auto start = time();
|
||||
fnc(std::forward<Args>(args)...);
|
||||
const long long timespent = duration_ns(time() - start);
|
||||
printf("| %-49s | %17s | %-29s |\n", step_name, thousand_sep(timespent).c_str(), format_time_ns(timespent).c_str());
|
||||
printf("| %-49s | %18s | %-29s |\n", step_name, thousand_sep(timespent).c_str(), format_time_ns(timespent).c_str());
|
||||
}
|
||||
|
||||
template <typename T, typename F, typename... Args>
|
||||
@ -76,23 +86,23 @@ np::Array<T> state_saver(const char* step_name, const char* filename, const bool
|
||||
if (!fs::exists(filepath) || force_redo) {
|
||||
bin = std::move(benchmark_function<np::Array<T>>(step_name, fnc, std::forward<Args>(args)...));
|
||||
if(save_state){
|
||||
#ifndef __DEBUG
|
||||
#if __DEBUG == false
|
||||
printf("Saving results of %s\r", step_name);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
save<T>(bin, filepath);
|
||||
#ifndef __DEBUG
|
||||
#if __DEBUG == false
|
||||
printf("%*c\r", 100, ' ');
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
#ifndef __DEBUG
|
||||
#if __DEBUG == false
|
||||
printf("Loading results of %s\r", step_name);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
bin = std::move(load<T>(filepath));
|
||||
printf("| %-49s | %17s | %-29s |\n", step_name, "None", "loaded saved state");
|
||||
printf("| %-49s | %18s | %-29s |\n", step_name, "None", "loaded saved state");
|
||||
}
|
||||
return bin;
|
||||
}
|
||||
@ -113,7 +123,7 @@ std::array<np::Array<T>, N> state_saver(const char* step_name, const std::vector
|
||||
if (abs || force_redo) {
|
||||
bin = std::move(benchmark_function<std::array<np::Array<T>, N>>(step_name, fnc, std::forward<Args>(args)...));
|
||||
if (save_state){
|
||||
#ifndef __DEBUG
|
||||
#if __DEBUG == false
|
||||
printf("Saving results of %s\r", step_name);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
@ -122,13 +132,13 @@ std::array<np::Array<T>, N> state_saver(const char* step_name, const std::vector
|
||||
sprintf(filepath, "%s/%s.bin", out_dir, filename);
|
||||
save<T>(bin[i++], filepath);
|
||||
}
|
||||
#ifndef __DEBUG
|
||||
#if __DEBUG == false
|
||||
printf("%*c\r", 100, ' ');
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
#ifndef __DEBUG
|
||||
#if __DEBUG == false
|
||||
printf("Loading results of %s\r", step_name);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
@ -137,7 +147,7 @@ std::array<np::Array<T>, N> state_saver(const char* step_name, const std::vector
|
||||
sprintf(filepath, "%s/%s.bin", out_dir, filename);
|
||||
bin[i++] = std::move(load<T>(filepath));
|
||||
}
|
||||
printf("| %-49s | %17s | %-29s |\n", step_name, "None", "loaded saved state");
|
||||
printf("| %-49s | %18s | %-29s |\n", step_name, "None", "loaded saved state");
|
||||
}
|
||||
return bin;
|
||||
}
|
||||
|
@ -2,16 +2,7 @@
|
||||
#include "data.hpp"
|
||||
#include "toolbox.hpp"
|
||||
#include "ViolaJones.hpp"
|
||||
|
||||
#define NB_THREADS 1024
|
||||
|
||||
#define NB_THREADS_2D_X 32
|
||||
#define NB_THREADS_2D_Y 32
|
||||
__device__ constexpr const size_t M = 5; //log2(NB_THREADS_2D_Y));
|
||||
|
||||
#define NB_THREADS_3D_X 16
|
||||
#define NB_THREADS_3D_Y 16
|
||||
#define NB_THREADS_3D_Z 4
|
||||
#include "config.hpp"
|
||||
|
||||
static __global__ void __test_working_kernel__(const np::Array<size_t> d_x, np::Array<size_t> d_y, const size_t length) {
|
||||
const size_t i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
@ -22,7 +13,7 @@ static __global__ void __test_working_kernel__(const np::Array<size_t> d_x, np::
|
||||
void test_working(const size_t& length) noexcept {
|
||||
const size_t size = length * sizeof(size_t);
|
||||
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Estimating memory footprint at : " + format_byte_size(2 * size));
|
||||
#endif
|
||||
|
||||
@ -64,7 +55,7 @@ void test_working_2d(const size_t& N1, const size_t& N2) noexcept {
|
||||
const size_t length = N1 * N2;
|
||||
const size_t size = length * sizeof(size_t);
|
||||
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Estimating memory footprint at : " + format_byte_size(2 * size));
|
||||
#endif
|
||||
|
||||
@ -107,7 +98,7 @@ void test_working_3d(const size_t& N1, const size_t& N2, const size_t& N3) noexc
|
||||
const size_t length = N1 * N2 * N3;
|
||||
const size_t size = length * sizeof(size_t);
|
||||
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Estimating memory footprint at : " + format_byte_size(2 * size));
|
||||
#endif
|
||||
|
||||
|
@ -1,5 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#define DATA_DIR "../data"
|
||||
#define OUT_DIR "./out"
|
||||
#define MODEL_DIR "./models"
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#define NB_THREADS 1024
|
||||
|
||||
#define NB_THREADS_2D_X 32
|
||||
#define NB_THREADS_2D_Y 32
|
||||
__device__ constexpr const size_t M = 5; //log2(NB_THREADS_2D_Y));
|
||||
|
||||
#define NB_THREADS_3D_X 16
|
||||
#define NB_THREADS_3D_Y 16
|
||||
#define NB_THREADS_3D_Z 4
|
||||
#endif
|
||||
|
||||
// Save state to avoid recalulation on restart
|
||||
#define SAVE_STATE true
|
||||
// Redo the state even if it's already saved
|
||||
@ -12,3 +28,11 @@
|
||||
// const size_t TS[] = { 1, 5, 10, 25, 50 };
|
||||
// const size_t TS[] = { 1, 5, 10, 25, 50, 100, 200, 300 };
|
||||
const size_t TS[] = { 1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000 };
|
||||
|
||||
// Enable verbose output (for debugging purposes)
|
||||
#define __DEBUG false
|
||||
// Debugging options
|
||||
#if __DEBUG
|
||||
#define IDX_INSPECT 4548
|
||||
#define IDX_INSPECT_OFFSET 100
|
||||
#endif
|
||||
|
116
cpp/data.hpp
116
cpp/data.hpp
@ -5,10 +5,8 @@
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include "config.hpp"
|
||||
|
||||
#define DATA_DIR "../data"
|
||||
#define OUT_DIR "./out"
|
||||
#define MODEL_DIR "./models"
|
||||
#define BUFFER_SIZE 256
|
||||
#define STRING_INT_SIZE 8 // Length of a number in log10 (including '-')
|
||||
#define S(N) std::string(N, '-').c_str()
|
||||
@ -42,20 +40,20 @@ namespace np {
|
||||
size_t length = 0;
|
||||
size_t* data = nullptr;
|
||||
size_t* refcount = nullptr;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
size_t total = 1;
|
||||
#endif
|
||||
|
||||
__host__ __device__
|
||||
Shape() noexcept {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Shape created (default)");
|
||||
// #endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Shape(const size_t& length, size_t* data) noexcept : length(length), data(data), refcount(new size_t(1)) {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
//print("Shape created (raw)");
|
||||
for(size_t i = 0; i < length; ++i)
|
||||
total *= data[i];
|
||||
@ -64,13 +62,13 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Shape created (initializer)");
|
||||
// #endif
|
||||
const auto* begin = dims.begin();
|
||||
for(size_t i = 0; i < length; ++i){
|
||||
data[i] = begin[i];
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
total *= data[i];
|
||||
#endif
|
||||
}
|
||||
@ -78,17 +76,17 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Shape(const Shape& shape) noexcept {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Shape created (copy)");
|
||||
#endif
|
||||
if (data != nullptr && data != shape.data){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former shape deleted (copy)");
|
||||
#endif
|
||||
delete[] data;
|
||||
}
|
||||
if (refcount != nullptr && refcount != shape.refcount){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former shape refcount freed (copy)");
|
||||
#endif
|
||||
delete refcount;
|
||||
@ -104,7 +102,7 @@ namespace np {
|
||||
refcount = shape.refcount;
|
||||
if (refcount != nullptr)
|
||||
(*refcount)++;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
else
|
||||
print("Moved shape has null refcount");
|
||||
total = shape.total;
|
||||
@ -113,17 +111,17 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Shape(Shape&& shape) noexcept {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Shape created (move));
|
||||
// #endif
|
||||
if (data != nullptr && data != shape.data){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former shape deleted (move)");
|
||||
#endif
|
||||
delete[] data;
|
||||
}
|
||||
if (refcount != nullptr && refcount != shape.refcount){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former shape refcount freed (move)");
|
||||
#endif
|
||||
delete refcount;
|
||||
@ -135,7 +133,7 @@ namespace np {
|
||||
shape.length = 0;
|
||||
shape.data = nullptr;
|
||||
shape.refcount = nullptr;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
total = shape.total;
|
||||
shape.total = 1;
|
||||
#endif
|
||||
@ -144,30 +142,30 @@ namespace np {
|
||||
__host__ __device__
|
||||
~Shape() noexcept {
|
||||
if(refcount == nullptr){
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Shape refcount freed more than once");
|
||||
// #endif
|
||||
return;
|
||||
}
|
||||
--(*refcount);
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// printf("Shape destructed : %lu\n", *refcount);
|
||||
// #endif
|
||||
if(*refcount == 0){
|
||||
if (data != nullptr){
|
||||
delete[] data;
|
||||
data = nullptr;
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Shape freeing ...");
|
||||
// #endif
|
||||
}
|
||||
//#ifdef __DEBUG
|
||||
//#if __DEBUG
|
||||
else
|
||||
printf("Shape freed more than once : %lu\n", *refcount);
|
||||
//#endif
|
||||
delete refcount;
|
||||
refcount = nullptr;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
total = 1;
|
||||
#endif
|
||||
}
|
||||
@ -175,17 +173,17 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Shape& operator=(const Shape& shape) noexcept {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Shape created (assign copy)");
|
||||
#endif
|
||||
if (data != nullptr && data != shape.data){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former shape deleted (assign copy)");
|
||||
#endif
|
||||
delete[] data;
|
||||
}
|
||||
if (refcount != nullptr && refcount != shape.refcount){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former shape refcount freed (assign copy)");
|
||||
#endif
|
||||
delete refcount;
|
||||
@ -201,7 +199,7 @@ namespace np {
|
||||
refcount = shape.refcount;
|
||||
if (refcount != nullptr)
|
||||
(*refcount)++;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
else
|
||||
printf("Assigned copy shape has null refcount");
|
||||
total = shape.total;
|
||||
@ -211,17 +209,17 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Shape& operator=(Shape&& shape) noexcept {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Shape created (assign move)");
|
||||
// #endif
|
||||
if (data != nullptr && data != shape.data){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former shape deleted (assign move)");
|
||||
#endif
|
||||
delete[] data;
|
||||
}
|
||||
if (refcount != nullptr && refcount != shape.refcount){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former shape refcount freed (assign move)");
|
||||
#endif
|
||||
delete refcount;
|
||||
@ -229,7 +227,7 @@ namespace np {
|
||||
length = shape.length;
|
||||
data = shape.data;
|
||||
refcount = shape.refcount;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
total = shape.total;
|
||||
if (refcount == nullptr)
|
||||
print("Assigned copy shape has null refcount");
|
||||
@ -244,7 +242,7 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
constexpr size_t& operator[](const size_t& i) const {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
if (i > length){
|
||||
printf("Index %lu out of shape length %lu\n", i, length);
|
||||
#ifndef __CUDACC__
|
||||
@ -258,7 +256,7 @@ namespace np {
|
||||
constexpr bool operator==(const Shape& other) const noexcept {
|
||||
if (length != other.length)
|
||||
return false;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
if (total != other.total)
|
||||
return false;
|
||||
#endif
|
||||
@ -284,42 +282,42 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Array() noexcept {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array created (default)");
|
||||
// #endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(const Shape& shape, T* data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array created (raw, copy shape)");
|
||||
// #endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array created (raw empty, copy shape)");
|
||||
// #endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(Shape&& shape, T* data) noexcept : shape(std::move(shape)), data(data), refcount(new size_t(1)) {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array created (raw, move shape)");
|
||||
// #endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(Shape&& shape) noexcept : shape(std::move(shape)), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array created (raw empty, move shape)");
|
||||
// #endif
|
||||
}
|
||||
|
||||
__host__ __device__
|
||||
Array(const Array& array) noexcept : shape(array.shape) {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Array created (copy)");
|
||||
#endif
|
||||
if (data != nullptr && data != array.data){
|
||||
@ -329,7 +327,7 @@ namespace np {
|
||||
delete[] data;
|
||||
}
|
||||
if (refcount != nullptr && refcount != array.refcount){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former array refcount freed (move)");
|
||||
#endif
|
||||
delete refcount;
|
||||
@ -344,7 +342,7 @@ namespace np {
|
||||
refcount = array.refcount;
|
||||
if (refcount != nullptr)
|
||||
(*refcount)++;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
else
|
||||
print("Moved array has null refcount");
|
||||
#endif
|
||||
@ -352,17 +350,17 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Array(Array&& array) noexcept {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array created (move)");
|
||||
// #endif
|
||||
if (data != nullptr && data != array.data){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former array deleted (move)");
|
||||
#endif
|
||||
delete[] data;
|
||||
}
|
||||
if (refcount != nullptr && refcount != array.refcount){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former array refcount freed (move)");
|
||||
#endif
|
||||
delete refcount;
|
||||
@ -378,24 +376,24 @@ namespace np {
|
||||
__host__ __device__
|
||||
~Array() noexcept {
|
||||
if(refcount == nullptr){
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array refcount freed more than once");
|
||||
// #endif
|
||||
return;
|
||||
}
|
||||
--(*refcount);
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// printf("Array destructed : %lu\n", *refcount);
|
||||
// #endif
|
||||
if(*refcount == 0){
|
||||
if (data != nullptr){
|
||||
delete[] data;
|
||||
data = nullptr;
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array freeing ...");
|
||||
// #endif
|
||||
}
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
else
|
||||
printf("Array freed more than once : %lu\n", *refcount);
|
||||
#endif
|
||||
@ -406,17 +404,17 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Array& operator=(const Array& array) noexcept {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Array created (assign copy)");
|
||||
#endif
|
||||
if (data != nullptr && data != array.data){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former array deleted (assign copy)");
|
||||
#endif
|
||||
delete[] data;
|
||||
}
|
||||
if (refcount != nullptr && refcount != array.refcount){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former array refcount freed (assign copy)");
|
||||
#endif
|
||||
delete refcount;
|
||||
@ -433,7 +431,7 @@ namespace np {
|
||||
refcount = array.refcount;
|
||||
if (refcount != nullptr)
|
||||
(*refcount)++;
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
else
|
||||
print("Assigned array has null refcount");
|
||||
#endif
|
||||
@ -442,17 +440,17 @@ namespace np {
|
||||
|
||||
__host__ __device__
|
||||
Array& operator=(Array&& array) noexcept {
|
||||
// #ifdef __DEBUG
|
||||
// #if __DEBUG
|
||||
// print("Array created (assign move)");
|
||||
// #endif
|
||||
if (data != nullptr && data != array.data){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former array deleted (assign move)");
|
||||
#endif
|
||||
delete[] data;
|
||||
}
|
||||
if (refcount != nullptr && refcount != array.refcount){
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("Former array refcount freed (assign move)");
|
||||
#endif
|
||||
delete refcount;
|
||||
@ -522,7 +520,7 @@ namespace np {
|
||||
template<typename T>
|
||||
__host__ __device__
|
||||
constexpr T& Array<T>::operator[](const size_t& i) const {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
if (i > shape.total){
|
||||
printf("Index %lu out of array size %lu\n", i, shape.total);
|
||||
#ifndef __CUDACC__
|
||||
@ -563,7 +561,7 @@ namespace np {
|
||||
template<typename T>
|
||||
template<typename F>
|
||||
Array<T> Array<T>::operator*(const Array<F>& other) const {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
if (shape != other.shape){
|
||||
printf("Incompatible shapes\n");
|
||||
throw;
|
||||
@ -598,7 +596,7 @@ namespace np {
|
||||
template<typename T>
|
||||
template<typename F>
|
||||
Array<T>& Array<T>::operator*=(const Array<F>& other) {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
if (shape != other.shape){
|
||||
printf("Incompatible shapes\n");
|
||||
throw;
|
||||
@ -613,7 +611,7 @@ namespace np {
|
||||
template<typename T>
|
||||
template<typename F>
|
||||
Array<T>& Array<T>::operator+=(const Array<F>& other) {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
if (shape != other.shape){
|
||||
printf("Incompatible shapes\n");
|
||||
throw;
|
||||
@ -638,7 +636,7 @@ namespace np {
|
||||
template<typename T>
|
||||
template<typename F>
|
||||
Array<T> Array<T>::operator-(const np::Array<F>& other) const {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
if (shape != other.shape){
|
||||
printf("Incompatible shapes\n");
|
||||
throw;
|
||||
@ -921,7 +919,7 @@ np::Array<T> copyToDevice(const char* name, const np::Array<T>& array) noexcept
|
||||
_print_cuda_error_(name, cudaMemcpy(d_array.data, array.data, array_size, cudaMemcpyHostToDevice));
|
||||
//_print_cuda_error_(name, cudaMemcpy(d_array.shape.refcount, array.shape.refcount, sizeof(size_t), cudaMemcpyHostToDevice));
|
||||
_print_cuda_error_(name, cudaMemcpy(d_array.shape.data, array.shape.data, shape_size, cudaMemcpyHostToDevice));
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
d_array.shape.total = np::prod(array.shape);
|
||||
#endif
|
||||
return d_array;
|
||||
|
106
cpp/projet.cpp
106
cpp/projet.cpp
@ -9,13 +9,6 @@ namespace fs = std::filesystem;
|
||||
|
||||
void test_float() noexcept;
|
||||
|
||||
#ifdef __DEBUG
|
||||
// #define IDX_INSPECT 0
|
||||
// #define IDX_INSPECT 2
|
||||
#define IDX_INSPECT 4548
|
||||
#define IDX_INSPECT_OFFSET 100
|
||||
#endif
|
||||
|
||||
#if GPU_BOOSTED
|
||||
#define LABEL "GPU"
|
||||
#define apply_features apply_features_gpu
|
||||
@ -34,34 +27,34 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
||||
for (const char* const folder_name : { "models", "out" })
|
||||
fs::create_directory(folder_name);
|
||||
|
||||
printf("| %-49s | %-17s | %-29s |\n", "Preprocessing", "Time spent (ns)", "Formatted time spent");
|
||||
printf("|%s|%s|%s|\n", S(51), S(19), S(31));
|
||||
printf("| %-49s | %-18s | %-29s |\n", "Preprocessing", "Time spent (ns)", "Formatted time spent");
|
||||
printf("|%s|%s|%s|\n", S(51), S(20), S(31));
|
||||
|
||||
const auto [ X_train, y_train, X_test, y_test ] = state_saver<uint8_t, 4>("Loading sets", {"X_train", "y_train", "X_test", "y_test"},
|
||||
FORCE_REDO, SAVE_STATE, OUT_DIR, load_datasets);
|
||||
|
||||
#ifdef __DEBUG
|
||||
// print("X_train");
|
||||
// print(X_train.shape);
|
||||
// print(X_train, { IDX_INSPECT });
|
||||
// print("X_test");
|
||||
// print(X_test.shape);
|
||||
// print(X_test, { IDX_INSPECT });
|
||||
// print("y_train");
|
||||
// print(y_train.shape);
|
||||
// print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
// print("y_test");
|
||||
// print(y_test.shape);
|
||||
// print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
#if __DEBUG
|
||||
print("X_train");
|
||||
print(X_train.shape);
|
||||
print(X_train, { IDX_INSPECT });
|
||||
print("X_test");
|
||||
print(X_test.shape);
|
||||
print(X_test, { IDX_INSPECT });
|
||||
print("y_train");
|
||||
print(y_train.shape);
|
||||
print(y_train, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
print("y_test");
|
||||
print(y_test.shape);
|
||||
print(y_test, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
#endif
|
||||
|
||||
const np::Array<uint8_t> feats = state_saver<uint8_t>("Building features", "feats",
|
||||
FORCE_REDO, SAVE_STATE, OUT_DIR, build_features, X_train.shape[1], X_train.shape[2]);
|
||||
|
||||
#ifdef __DEBUG
|
||||
// print("feats");
|
||||
// print(feats.shape);
|
||||
// print_feat(feats, { IDX_INSPECT });
|
||||
#if __DEBUG
|
||||
print("feats");
|
||||
print(feats.shape);
|
||||
print_feat(feats, { IDX_INSPECT });
|
||||
#endif
|
||||
|
||||
const np::Array<uint32_t> X_train_ii = state_saver<uint32_t>("Converting training set to integral images (" LABEL ")", "X_train_ii_" LABEL,
|
||||
@ -69,14 +62,13 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
||||
const np::Array<uint32_t> X_test_ii = state_saver<uint32_t>("Converting testing set to integral images (" LABEL ")", "X_test_ii_" LABEL,
|
||||
FORCE_REDO, SAVE_STATE, OUT_DIR, set_integral_image, X_test);
|
||||
|
||||
#ifdef __DEBUG
|
||||
// print("X_train_ii");
|
||||
// print(X_train_ii.shape);
|
||||
// print(X_train_ii, { IDX_INSPECT });
|
||||
// print("X_test_ii");
|
||||
// print(X_test_ii.shape);
|
||||
// print(X_test_ii, { IDX_INSPECT });
|
||||
// return {};
|
||||
#if __DEBUG
|
||||
print("X_train_ii");
|
||||
print(X_train_ii.shape);
|
||||
print(X_train_ii, { IDX_INSPECT });
|
||||
print("X_test_ii");
|
||||
print(X_test_ii.shape);
|
||||
print(X_test_ii, { IDX_INSPECT });
|
||||
#endif
|
||||
|
||||
const np::Array<int32_t> X_train_feat = state_saver<int32_t>("Applying features to training set (" LABEL ")", "X_train_feat_" LABEL,
|
||||
@ -84,46 +76,46 @@ std::tuple<np::Array<int32_t>, np::Array<uint16_t>, np::Array<uint8_t>, np::Arra
|
||||
const np::Array<int32_t> X_test_feat = state_saver<int32_t>("Applying features to testing set (" LABEL ")", "X_test_feat_" LABEL,
|
||||
FORCE_REDO, SAVE_STATE, OUT_DIR, apply_features, feats, X_test_ii);
|
||||
|
||||
#ifdef __DEBUG
|
||||
// print("X_train_feat");
|
||||
// print(X_train_feat.shape);
|
||||
// print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
// print("X_test_feat");
|
||||
// print(X_test_feat.shape);
|
||||
// print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
#if __DEBUG
|
||||
print("X_train_feat");
|
||||
print(X_train_feat.shape);
|
||||
print(X_train_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
print("X_test_feat");
|
||||
print(X_test_feat.shape);
|
||||
print(X_test_feat, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
#endif
|
||||
|
||||
// const Array<int> indices = measure_time_save<Array<int>>("Selecting best features", "indices", select_percentile, X_train_feat, d.y_train);
|
||||
// const Array<int> indices = measure_time<Array<int>>("Selecting best features", select_percentile, X_train_feat, d.y_train);
|
||||
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
// print_feature(indices);
|
||||
#endif
|
||||
|
||||
const np::Array<uint16_t> X_train_feat_argsort = state_saver<uint16_t>("Precalculating training set argsort (" LABEL ")", "X_train_feat_argsort_" LABEL,
|
||||
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_train_feat);
|
||||
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("X_train_feat_argsort");
|
||||
print(X_train_feat_argsort.shape);
|
||||
print(X_train_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
#endif
|
||||
|
||||
// const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", "X_test_feat_argsort_" LABEL,
|
||||
// FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
|
||||
const np::Array<uint16_t> X_test_feat_argsort = state_saver<uint16_t>("Precalculating testing set argsort (" LABEL ")", "X_test_feat_argsort_" LABEL,
|
||||
FORCE_REDO, SAVE_STATE, OUT_DIR, argsort_2d, X_test_feat);
|
||||
|
||||
#ifdef __DEBUG
|
||||
// print("X_test_feat_argsort");
|
||||
// print(X_test_feat_argsort.shape);
|
||||
// print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
#if __DEBUG
|
||||
print("X_test_feat_argsort");
|
||||
print(X_test_feat_argsort.shape);
|
||||
print(X_test_feat_argsort, { IDX_INSPECT, IDX_INSPECT + IDX_INSPECT_OFFSET });
|
||||
#endif
|
||||
|
||||
return { X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test };
|
||||
}
|
||||
|
||||
void train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_train_feat_argsort, const np::Array<uint8_t>& y_train) {
|
||||
printf("\n| %-49s | %-17s | %-29s |\n", "Training", "Time spent (ns)", "Formatted time spent");
|
||||
printf("|%s|%s|%s|\n", S(51), S(19), S(31));
|
||||
printf("\n| %-49s | %-18s | %-29s |\n", "Training", "Time spent (ns)", "Formatted time spent");
|
||||
printf("|%s|%s|%s|\n", S(51), S(20), S(31));
|
||||
|
||||
for (const size_t T : TS) {
|
||||
char title[BUFFER_SIZE] = { 0 };
|
||||
@ -133,13 +125,13 @@ void train(const np::Array<int32_t>& X_train_feat, const np::Array<uint16_t>& X_
|
||||
sprintf(alphas_title, "alphas_%lu_%s", T, LABEL);
|
||||
sprintf(final_classifiers_title, "final_classifiers_%lu_%s", T, LABEL);
|
||||
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
const auto [ alphas, final_classifiers ] = state_saver<float64_t, 2>(title, { alphas_title, final_classifiers_title },
|
||||
#else
|
||||
state_saver<float64_t, 2>(title, { alphas_title, final_classifiers_title },
|
||||
#endif
|
||||
FORCE_REDO, SAVE_STATE, MODEL_DIR, train_viola_jones, T, X_train_feat, X_train_feat_argsort, y_train);
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
print("alphas");
|
||||
print(alphas);
|
||||
print("final_classifiers");
|
||||
@ -298,9 +290,9 @@ void final_unit_test() {
|
||||
}
|
||||
|
||||
int main(){
|
||||
#ifdef __DEBUG
|
||||
printf("| %-49s | %-17s | %-29s |\n", "Unit testing", "Time spent (ns)", "Formatted time spent");
|
||||
printf("|%s|%s|%s|\n", S(51), S(19), S(31));
|
||||
#if __DEBUG
|
||||
printf("| %-49s | %-18s | %-29s |\n", "Unit testing", "Time spent (ns)", "Formatted time spent");
|
||||
printf("|%s|%s|%s|\n", S(51), S(20), S(31));
|
||||
benchmark_function_void("Testing GPU capabilities 1D", test_working, 3 + (1<<29));
|
||||
benchmark_function_void("Testing GPU capabilities 2D", test_working_2d, 3 + (1<<15), 2 + (1<<14));
|
||||
benchmark_function_void("Testing GPU capabilities 3D", test_working_3d, 9 + (1<<10), 5 + (1<<10), 7 + (1<<9));
|
||||
@ -313,7 +305,7 @@ int main(){
|
||||
train(X_train_feat, X_train_feat_argsort, y_train);
|
||||
testing_and_evaluating(X_train_feat, y_train, X_test_feat, y_test);
|
||||
final_unit_test();
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
printf("\nAFTER CLEANUP\n");
|
||||
#endif
|
||||
return EXIT_SUCCESS;
|
||||
|
@ -21,7 +21,7 @@ void clearProgress() noexcept {
|
||||
|
||||
template<typename T>
|
||||
void test(const uint64_t& N) noexcept {
|
||||
#ifdef __DEBUG
|
||||
#if __DEBUG
|
||||
printf("DETERMINISTIC for N=%s of %s sized %s\n", thousand_sep(N).c_str(), typeid(T).name(), format_byte_size(sizeof(T)).c_str());
|
||||
print("Estimating memory footprint at : " + format_byte_size(3 * N * sizeof(T)));
|
||||
#endif
|
||||
|
@ -1,12 +1,7 @@
|
||||
from numba import float64, uint32, cuda, int32, uint16
|
||||
from config import COMPILE_WITH_C
|
||||
from numba import float64, uint32, cuda, int32
|
||||
from config import COMPILE_WITH_C, NB_THREADS, NB_THREADS_2D, NB_THREADS_3D, M
|
||||
import numpy as np
|
||||
|
||||
NB_THREADS = 1024
|
||||
NB_THREADS_2D = (32, 32)
|
||||
NB_THREADS_3D = (16, 16, 4)
|
||||
M = int(np.log2(NB_THREADS_2D[1]))
|
||||
|
||||
if COMPILE_WITH_C:
|
||||
from numba import njit
|
||||
else:
|
||||
|
@ -2,6 +2,7 @@ from toolbox import picke_multi_loader, format_time_ns, unit_test_argsort_2d
|
||||
from typing import List, Tuple
|
||||
from time import perf_counter_ns
|
||||
import numpy as np
|
||||
from config import OUT_DIR, DATA_DIR
|
||||
|
||||
def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e-8) -> None:
|
||||
"""Test if the each result is equals to other devices.
|
||||
@ -20,32 +21,32 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e
|
||||
fnc_s = perf_counter_ns()
|
||||
n_total= 0
|
||||
n_success = 0
|
||||
print(f"\n| {'Unit testing':<37} | {'Test state':<10} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |")
|
||||
print(f"|{'-'*39}|{'-'*12}|{'-'*19}|{'-'*31}|")
|
||||
print(f"\n| {'Unit testing':<37} | {'Test state':<10} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |")
|
||||
print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|")
|
||||
|
||||
for filename in ["X_train_feat", "X_test_feat", "X_train_ii", "X_test_ii"]:
|
||||
print(f"{filename}...", end = "\r")
|
||||
bs = picke_multi_loader([f"{filename}_{label}" for label in labels], "./out")
|
||||
bs = picke_multi_loader([f"{filename}_{label}" for label in labels], OUT_DIR)
|
||||
|
||||
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
||||
if b1 is None:
|
||||
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
|
||||
#print(f"| {filename:<22} - {l1:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||
continue
|
||||
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
||||
if i >= j:
|
||||
continue
|
||||
if b2 is None:
|
||||
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
|
||||
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||
continue
|
||||
n_total += 1
|
||||
s = perf_counter_ns()
|
||||
state = np.abs(b1 - b2).mean() < tol
|
||||
e = perf_counter_ns() - s
|
||||
if state:
|
||||
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
n_success += 1
|
||||
else:
|
||||
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
|
||||
for filename, featname in zip(["X_train_feat_argsort", "X_test_feat_argsort"], ["X_train_feat", "X_test_feat"]):
|
||||
print(f"Loading {filename}...", end = "\r")
|
||||
@ -53,14 +54,14 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e
|
||||
bs = []
|
||||
for label in labels:
|
||||
if feat is None:
|
||||
feat_tmp = picke_multi_loader([f"{featname}_{label}"], "./out")[0]
|
||||
feat_tmp = picke_multi_loader([f"{featname}_{label}"], OUT_DIR)[0]
|
||||
if feat_tmp is not None:
|
||||
feat = feat_tmp
|
||||
bs.append(picke_multi_loader([f"{filename}_{label}"], "./out")[0])
|
||||
bs.append(picke_multi_loader([f"{filename}_{label}"], OUT_DIR)[0])
|
||||
|
||||
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
||||
if b1 is None:
|
||||
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
|
||||
#print(f"| {filename:<22} - {l1:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||
continue
|
||||
if feat is not None:
|
||||
n_total += 1
|
||||
@ -68,26 +69,26 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e
|
||||
state = unit_test_argsort_2d(feat, b1)
|
||||
e = perf_counter_ns() - s
|
||||
if state:
|
||||
print(f"| {filename:<22} - {l1:<4} argsort | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {filename:<22} - {l1:<4} argsort | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
n_success += 1
|
||||
else:
|
||||
print(f"| {filename:<22} - {l1:<4} argsort | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {filename:<22} - {l1:<4} argsort | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
|
||||
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
||||
if i >= j:
|
||||
continue
|
||||
if b2 is None:
|
||||
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
|
||||
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||
continue
|
||||
n_total += 1
|
||||
s = perf_counter_ns()
|
||||
state = np.abs(b1 - b2).mean() < tol
|
||||
e = perf_counter_ns() - s
|
||||
if state:
|
||||
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
n_success += 1
|
||||
else:
|
||||
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
|
||||
for T in TS:
|
||||
for filename in ["alphas", "final_classifiers"]:
|
||||
@ -96,32 +97,33 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e
|
||||
|
||||
for i, (b1, l1) in enumerate(zip(bs, labels)):
|
||||
if b1 is None:
|
||||
#print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
|
||||
#print(f"| {filename + '_' + str(T):<22} - {l1:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||
continue
|
||||
for j, (b2, l2) in enumerate(zip(bs, labels)):
|
||||
if i >= j:
|
||||
continue
|
||||
if b2 is None:
|
||||
#print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
|
||||
#print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |")
|
||||
continue
|
||||
n_total += 1
|
||||
s = perf_counter_ns()
|
||||
state = np.abs(b1 - b2).mean() < tol
|
||||
e = perf_counter_ns() - s
|
||||
if state:
|
||||
print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
n_success += 1
|
||||
else:
|
||||
print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"|{'-'*39}|{'-'*12}|{'-'*19}|{'-'*31}|")
|
||||
e = perf_counter_ns() - fnc_s
|
||||
print(f"| {'Unit testing summary':<37} | {str(n_success) + '/' + str(n_total):>10} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
|
||||
def load_datasets(data_dir: str = "../data") -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||||
print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|")
|
||||
e = perf_counter_ns() - fnc_s
|
||||
print(f"| {'Unit testing summary':<37} | {str(n_success) + '/' + str(n_total):>10} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
|
||||
def load_datasets(data_dir: str = DATA_DIR) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""Load the datasets.
|
||||
|
||||
Args:
|
||||
data_dir (str, optional): [description]. Defaults to "../data".
|
||||
data_dir (str, optional): [description]. Defaults to DATA_DIR (see config.py).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: [description]
|
||||
|
@ -1,3 +1,14 @@
|
||||
import numpy as np
|
||||
|
||||
DATA_DIR = "../data"
|
||||
OUT_DIR = "./out"
|
||||
MODEL_DIR = "./models"
|
||||
|
||||
NB_THREADS = 1024
|
||||
NB_THREADS_2D = (32, 32)
|
||||
NB_THREADS_3D = (16, 16, 4)
|
||||
M = int(np.log2(NB_THREADS_2D[1]))
|
||||
|
||||
# Save state to avoid recalulation on restart
|
||||
SAVE_STATE = True
|
||||
# Redo the state even if it's already saved
|
||||
@ -5,7 +16,7 @@ FORCE_REDO = False
|
||||
# Use NJIT to greatly accelerate runtime
|
||||
COMPILE_WITH_C = False
|
||||
# Use GPU to greatly accelerate runtime (as priority over NJIT)
|
||||
GPU_BOOSTED = False
|
||||
GPU_BOOSTED = True
|
||||
# Number of weak classifiers
|
||||
# TS = [1]
|
||||
# TS = [1, 5, 10]
|
||||
@ -13,3 +24,12 @@ GPU_BOOSTED = False
|
||||
# TS = [1, 5, 10, 25, 50, 100, 200]
|
||||
# TS = [1, 5, 10, 25, 50, 100, 200, 300]
|
||||
TS = [1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000]
|
||||
# Enable verbose output (for debugging purposes)
|
||||
__DEBUG = False
|
||||
# Debugging options
|
||||
if __DEBUG:
|
||||
IDX_INSPECT = 4548
|
||||
IDX_INSPECT_OFFSET = 100
|
||||
np.seterr(all = 'raise')
|
||||
# Debug option (image width * log_10(length) + extra characters)
|
||||
np.set_printoptions(linewidth = 19 * 6 + 3)
|
||||
|
140
python/projet.py
140
python/projet.py
@ -12,9 +12,9 @@ from time import perf_counter_ns
|
||||
from os import makedirs
|
||||
import numpy as np
|
||||
|
||||
#np.seterr(all = 'raise')
|
||||
|
||||
from config import FORCE_REDO, COMPILE_WITH_C, GPU_BOOSTED, TS, SAVE_STATE
|
||||
from config import FORCE_REDO, COMPILE_WITH_C, GPU_BOOSTED, TS, SAVE_STATE, MODEL_DIR, __DEBUG
|
||||
if __DEBUG:
|
||||
from config import IDX_INSPECT, IDX_INSPECT_OFFSET
|
||||
|
||||
if GPU_BOOSTED:
|
||||
from ViolaJonesGPU import apply_features, set_integral_image, argsort
|
||||
@ -26,12 +26,6 @@ else:
|
||||
from ViolaJonesCPU import apply_features, set_integral_image, argsort
|
||||
label = 'CPU' if COMPILE_WITH_C else 'PY'
|
||||
|
||||
# FIXME Debug code
|
||||
# IDX_INSPECT = 0
|
||||
# IDX_INSPECT = 2
|
||||
IDX_INSPECT = 4548
|
||||
IDX_INSPECT_OFFSET = 100
|
||||
|
||||
def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Train the weak classifiers.
|
||||
|
||||
@ -45,25 +39,23 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) ->
|
||||
"""
|
||||
feats = state_saver("Building features", "feats", lambda: build_features(X_train.shape[1], X_train.shape[2]), FORCE_REDO, SAVE_STATE)
|
||||
|
||||
# FIXME Debug code
|
||||
# print("feats")
|
||||
# print(feats.shape)
|
||||
# print(feats[IDX_INSPECT].ravel())
|
||||
# return 0, 0
|
||||
if __DEBUG:
|
||||
print("feats")
|
||||
print(feats.shape)
|
||||
print(feats[IDX_INSPECT].ravel())
|
||||
|
||||
X_train_ii = state_saver(f"Converting training set to integral images ({label})", f"X_train_ii_{label}",
|
||||
lambda: set_integral_image(X_train), FORCE_REDO, SAVE_STATE)
|
||||
X_test_ii = state_saver(f"Converting testing set to integral images ({label})", f"X_test_ii_{label}",
|
||||
lambda: set_integral_image(X_test), FORCE_REDO, SAVE_STATE)
|
||||
|
||||
# FIXME Debug code
|
||||
# print("X_train_ii")
|
||||
# print(X_train_ii.shape)
|
||||
# print(X_train_ii[IDX_INSPECT])
|
||||
# print("X_test_ii")
|
||||
# print(X_test_ii.shape)
|
||||
# print(X_test_ii[IDX_INSPECT])
|
||||
# return 0, 0
|
||||
if __DEBUG:
|
||||
print("X_train_ii")
|
||||
print(X_train_ii.shape)
|
||||
print(X_train_ii[IDX_INSPECT])
|
||||
print("X_test_ii")
|
||||
print(X_test_ii.shape)
|
||||
print(X_test_ii[IDX_INSPECT])
|
||||
|
||||
X_train_feat = state_saver(f"Applying features to training set ({label})", f"X_train_feat_{label}",
|
||||
lambda: apply_features(feats, X_train_ii), FORCE_REDO, SAVE_STATE)
|
||||
@ -71,14 +63,13 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) ->
|
||||
lambda: apply_features(feats, X_test_ii), FORCE_REDO, SAVE_STATE)
|
||||
del X_train_ii, X_test_ii, feats
|
||||
|
||||
# FIXME Debug code
|
||||
# print("X_train_feat")
|
||||
# print(X_train_feat.shape)
|
||||
# print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||
# print("X_test_feat")
|
||||
# print(X_test_feat.shape)
|
||||
# print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||
# return 0, 0
|
||||
if __DEBUG:
|
||||
print("X_train_feat")
|
||||
print(X_train_feat.shape)
|
||||
print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||
print("X_test_feat")
|
||||
print(X_test_feat.shape)
|
||||
print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||
|
||||
#indices = state_saver("Selecting best features training set", "indices", force_redo = True, save_state = SAVE_STATE,
|
||||
# fnc = lambda: SelectPercentile(f_classif, percentile = 10).fit(X_train_feat.T, y_train).get_support(indices = True))
|
||||
@ -96,40 +87,35 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) ->
|
||||
|
||||
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
|
||||
|
||||
#return 0, 0
|
||||
|
||||
X_train_feat_argsort = state_saver(f"Precalculating training set argsort ({label})", f"X_train_feat_argsort_{label}",
|
||||
lambda: argsort(X_train_feat), FORCE_REDO, SAVE_STATE)
|
||||
|
||||
# FIXME Debug code
|
||||
# print("X_train_feat_argsort")
|
||||
# print(X_train_feat_argsort.shape)
|
||||
# print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||
# benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort))
|
||||
# return 0, 0
|
||||
if __DEBUG:
|
||||
print("X_train_feat_argsort")
|
||||
print(X_train_feat_argsort.shape)
|
||||
print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||
benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort))
|
||||
|
||||
# X_test_feat_argsort = state_saver(f"Precalculating testing set argsort ({label})", f"X_test_feat_argsort_{label}",
|
||||
# lambda: argsort(X_test_feat), True, False)
|
||||
X_test_feat_argsort = state_saver(f"Precalculating testing set argsort ({label})", f"X_test_feat_argsort_{label}",
|
||||
lambda: argsort(X_test_feat), FORCE_REDO, SAVE_STATE)
|
||||
|
||||
# FIXME Debug code
|
||||
# print("X_test_feat_argsort")
|
||||
# print(X_test_feat_argsort.shape)
|
||||
# print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||
# benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort))
|
||||
# return 0, 0
|
||||
# del X_test_feat_argsort
|
||||
if __DEBUG:
|
||||
print("X_test_feat_argsort")
|
||||
print(X_test_feat_argsort.shape)
|
||||
print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
|
||||
benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort))
|
||||
del X_test_feat_argsort
|
||||
|
||||
print(f"\n| {'Training':<49} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*19}|{'-'*31}|")
|
||||
print(f"\n| {'Training':<49} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*20}|{'-'*31}|")
|
||||
|
||||
for T in TS:
|
||||
# alphas, final_classifiers = state_saver(f"ViolaJones T = {T:<3} ({label})", [f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"],
|
||||
state_saver(f"ViolaJones T = {T:<4} ({label})", [f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"],
|
||||
lambda: train_viola_jones(T, X_train_feat, X_train_feat_argsort, y_train), FORCE_REDO, SAVE_STATE, "./models")
|
||||
# FIXME Debug code
|
||||
# print("alphas")
|
||||
# print(alphas)
|
||||
# print("final_classifiers")
|
||||
# print(final_classifiers)
|
||||
alphas, final_classifiers = state_saver(f"ViolaJones T = {T:<3} ({label})", [f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"],
|
||||
lambda: train_viola_jones(T, X_train_feat, X_train_feat_argsort, y_train), FORCE_REDO, SAVE_STATE, MODEL_DIR)
|
||||
if __DEBUG:
|
||||
print("alphas")
|
||||
print(alphas)
|
||||
print("final_classifiers")
|
||||
print(final_classifiers)
|
||||
|
||||
return X_train_feat, X_test_feat
|
||||
|
||||
@ -183,43 +169,37 @@ def _main_() -> None:
|
||||
for folder_name in ["models", "out"]:
|
||||
makedirs(folder_name, exist_ok = True)
|
||||
|
||||
print(f"| {'Preprocessing':<49} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*19}|{'-'*31}|")
|
||||
print(f"| {'Preprocessing':<49} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*20}|{'-'*31}|")
|
||||
|
||||
X_train, y_train, X_test, y_test = state_saver("Loading sets", ["X_train", "y_train", "X_test", "y_test"],
|
||||
load_datasets, FORCE_REDO, SAVE_STATE)
|
||||
|
||||
# FIXME Debug option (image width * log_10(length) + extra characters)
|
||||
# np.set_printoptions(linewidth = 19 * 6 + 3)
|
||||
|
||||
# FIXME Debug code
|
||||
# print("X_train")
|
||||
# print(X_train.shape)
|
||||
# print(X_train[IDX_INSPECT])
|
||||
# print("X_test")
|
||||
# print(X_test.shape)
|
||||
# print(X_test[IDX_INSPECT])
|
||||
# print("y_train")
|
||||
# print(y_train.shape)
|
||||
# print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
||||
# print("y_test")
|
||||
# print(y_test.shape)
|
||||
# print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
||||
# return
|
||||
if __DEBUG:
|
||||
print("X_train")
|
||||
print(X_train.shape)
|
||||
print(X_train[IDX_INSPECT])
|
||||
print("X_test")
|
||||
print(X_test.shape)
|
||||
print(X_test[IDX_INSPECT])
|
||||
print("y_train")
|
||||
print(y_train.shape)
|
||||
print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
||||
print("y_test")
|
||||
print(y_test.shape)
|
||||
print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
|
||||
|
||||
X_train_feat, X_test_feat = bench_train(X_train, X_test, y_train)
|
||||
|
||||
# FIXME Debug code
|
||||
# return
|
||||
|
||||
# X_train_feat, X_test_feat = picke_multi_loader([f"X_train_feat_{label}", f"X_test_feat_{label}"], "./out")
|
||||
# indices = picke_multi_loader(["indices"], "./out")[0]
|
||||
# X_train_feat, X_test_feat = picke_multi_loader([f"X_train_feat_{label}", f"X_test_feat_{label}"], OUT_DIR)
|
||||
# indices = picke_multi_loader(["indices"], OUT_DIR)[0]
|
||||
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
|
||||
|
||||
bench_accuracy(label, X_train_feat, X_test_feat, y_train, y_test)
|
||||
|
||||
if __name__ == "__main__":
|
||||
#toolbox_unit_test()
|
||||
_main_()
|
||||
if __DEBUG:
|
||||
toolbox_unit_test()
|
||||
|
||||
# Only execute unit test after having trained the specified labels
|
||||
unit_test(TS, ["GPU", "CPU", "PY", "PGPU"])
|
||||
|
@ -4,6 +4,7 @@ from numba import njit
|
||||
import numpy as np
|
||||
import pickle
|
||||
import os
|
||||
from config import MODEL_DIR, OUT_DIR
|
||||
|
||||
formats = ["ns", "µs", "ms", "s", "m", "h", "j", "w", "M", "y"]
|
||||
nb = np.array([1, 1000, 1000, 1000, 60, 60, 24, 7, 4, 12], dtype = np.uint16)
|
||||
@ -48,12 +49,12 @@ def toolbox_unit_test() -> None:
|
||||
# UINT64_MAX == 2^64 = 18446744073709551615 == -1
|
||||
assert "635y 5M 3j 23h 34m 33s 709ms 551µs 616ns" == format_time_ns(2**64)
|
||||
|
||||
def picke_multi_loader(filenames: List[str], save_dir: str = "./models") -> List[Any]:
|
||||
def picke_multi_loader(filenames: List[str], save_dir: str = MODEL_DIR) -> List[Any]:
|
||||
"""Load multiple pickle data files.
|
||||
|
||||
Args:
|
||||
filenames (List[str]): List of all the filename to load.
|
||||
save_dir (str, optional): Path of the files to load. Defaults to "./models".
|
||||
save_dir (str, optional): Path of the files to load. Defaults to MODELS_DIR (see config.py).
|
||||
|
||||
Returns:
|
||||
List[Any]. List of loaded pickle data files.
|
||||
@ -82,10 +83,10 @@ def benchmark_function(step_name: str, fnc: Callable) -> Any:
|
||||
s = perf_counter_ns()
|
||||
b = fnc()
|
||||
e = perf_counter_ns() - s
|
||||
print(f"| {step_name:<49} | {e:>17,} | {format_time_ns(e):<29} |")
|
||||
print(f"| {step_name:<49} | {e:>18,} | {format_time_ns(e):<29} |")
|
||||
return b
|
||||
|
||||
def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo: bool = False, save_state: bool = True, save_dir: str = "./out") -> Any:
|
||||
def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo: bool = False, save_state: bool = True, save_dir: str = OUT_DIR) -> Any:
|
||||
"""Either execute a function then saves the result or load the already existing result.
|
||||
|
||||
Args:
|
||||
@ -93,7 +94,7 @@ def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo
|
||||
filename (Union[str, List[str]]): Name or list of names of the filenames where the result(s) are saved.
|
||||
fnc ([type]): Function to call.
|
||||
force_redo (bool, optional): Recall the function even if the result(s) is already saved. Defaults to False.
|
||||
save_dir (str, optional): Path of the directory to save the result(s). Defaults to "./out".
|
||||
save_dir (str, optional): Path of the directory to save the result(s). Defaults to OUT_DIR (see config.py).
|
||||
|
||||
Returns:
|
||||
Any: The result(s) of the called function
|
||||
@ -111,7 +112,7 @@ def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo
|
||||
print(f"Loading results of {step_name}", end = '\r')
|
||||
with open(f"{save_dir}/{filename}.pkl", "rb") as f:
|
||||
res = pickle.load(f)
|
||||
print(f"| {step_name:<49} | {'None':>17} | {'loaded saved state':<29} |")
|
||||
print(f"| {step_name:<49} | {'None':>18} | {'loaded saved state':<29} |")
|
||||
return res
|
||||
elif isinstance(filename, list):
|
||||
abs = False
|
||||
@ -129,7 +130,7 @@ def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo
|
||||
print(' ' * 100, end = '\r')
|
||||
return b
|
||||
|
||||
print(f"| {step_name:<49} | {'None':>17} | {'loaded saved state':<29} |")
|
||||
print(f"| {step_name:<49} | {'None':>18} | {'loaded saved state':<29} |")
|
||||
b = []
|
||||
print(f"Loading results of {step_name}", end = '\r')
|
||||
for fn in filename:
|
||||
|
Loading…
x
Reference in New Issue
Block a user