#pragma once #include #include #include #include #include #include #define DATA_DIR "../data" #define OUT_DIR "./out" #define MODEL_DIR "./models" #define BUFFER_SIZE 256 #define STRING_INT_SIZE 8 // Length of a number in log10 (including '-') #define S(N) std::string(N, '-').c_str() #ifndef __CUDACC__ #define __host__ #define __device__ #endif typedef float float32_t; typedef double float64_t; typedef long double float128_t; __host__ __device__ constexpr inline int print(const char* str) noexcept { return printf("%s\n", str); } inline int print(const std::string& s) noexcept { return printf("%s\n", s.c_str()); } namespace np { constexpr const float64_t inf = std::numeric_limits::infinity(); typedef struct Slice { size_t x = 0, y = 0, z = 0; } Slice; typedef struct Shape { size_t length = 0; size_t* data = nullptr; size_t* refcount = nullptr; #ifdef __DEBUG size_t total = 1; #endif __host__ __device__ Shape() noexcept { // #ifdef __DEBUG // print("Shape created (default)"); // #endif } __host__ __device__ Shape(const size_t& length, size_t* data) noexcept : length(length), data(data), refcount(new size_t(1)) { #ifdef __DEBUG //print("Shape created (raw)"); for(size_t i = 0; i < length; ++i) total *= data[i]; #endif } __host__ __device__ Shape(const std::initializer_list& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) { // #ifdef __DEBUG // print("Shape created (initializer)"); // #endif const auto* begin = dims.begin(); for(size_t i = 0; i < length; ++i){ data[i] = begin[i]; #ifdef __DEBUG total *= data[i]; #endif } } __host__ __device__ Shape(const Shape& shape) noexcept { #ifdef __DEBUG print("Shape created (copy)"); #endif if (data != nullptr && data != shape.data){ #ifdef __DEBUG print("Former shape deleted (copy)"); #endif delete[] data; } if (refcount != nullptr && refcount != shape.refcount){ #ifdef __DEBUG print("Former shape refcount freed (copy)"); #endif delete refcount; } length = shape.length; //data = new size_t[length]; //memcpy(data, shape.data, length * sizeof(size_t)); //refcount = new size_t; //memcpy(refcount, shape.refcount, sizeof(size_t)); data = shape.data; refcount = shape.refcount; if (refcount != nullptr) (*refcount)++; #ifdef __DEBUG else print("Moved shape has null refcount"); total = shape.total; #endif } __host__ __device__ Shape(Shape&& shape) noexcept { // #ifdef __DEBUG // print("Shape created (move)); // #endif if (data != nullptr && data != shape.data){ #ifdef __DEBUG print("Former shape deleted (move)"); #endif delete[] data; } if (refcount != nullptr && refcount != shape.refcount){ #ifdef __DEBUG print("Former shape refcount freed (move)"); #endif delete refcount; } length = shape.length; data = shape.data; refcount = shape.refcount; shape.length = 0; shape.data = nullptr; shape.refcount = nullptr; #ifdef __DEBUG total = shape.total; shape.total = 1; #endif } __host__ __device__ ~Shape() noexcept { if(refcount == nullptr){ // #ifdef __DEBUG // print("Shape refcount freed more than once"); // #endif return; } --(*refcount); // #ifdef __DEBUG // printf("Shape destructed : %lu\n", *refcount); // #endif if(*refcount == 0){ if (data != nullptr){ delete[] data; data = nullptr; // #ifdef __DEBUG // print("Shape freeing ..."); // #endif } //#ifdef __DEBUG else printf("Shape freed more than once : %lu\n", *refcount); //#endif delete refcount; refcount = nullptr; #ifdef __DEBUG total = 1; #endif } } __host__ __device__ Shape& operator=(const Shape& shape) noexcept { #ifdef __DEBUG print("Shape created (assign copy)"); #endif if (data != nullptr && data != shape.data){ #ifdef __DEBUG print("Former shape deleted (assign copy)"); #endif delete[] data; } if (refcount != nullptr && refcount != shape.refcount){ #ifdef __DEBUG print("Former shape refcount freed (assign copy)"); #endif delete refcount; } length = shape.length; // data = new size_t[length]; // memcpy(data, shape.data, length * sizeof(size_t)); // refcount = new size_t; // memcpy(refcount, shape.refcount, sizeof(size_t)); data = shape.data; refcount = shape.refcount; if (refcount != nullptr) (*refcount)++; #ifdef __DEBUG else printf("Assigned copy shape has null refcount"); total = shape.total; #endif return *this; } __host__ __device__ Shape& operator=(Shape&& shape) noexcept { // #ifdef __DEBUG // print("Shape created (assign move)"); // #endif if (data != nullptr && data != shape.data){ #ifdef __DEBUG print("Former shape deleted (assign move)"); #endif delete[] data; } if (refcount != nullptr && refcount != shape.refcount){ #ifdef __DEBUG print("Former shape refcount freed (assign move)"); #endif delete refcount; } length = shape.length; data = shape.data; refcount = shape.refcount; #ifdef __DEBUG total = shape.total; if (refcount == nullptr) print("Assigned copy shape has null refcount"); shape.total = 1; #endif shape.length = 0; shape.data = nullptr; shape.refcount = nullptr; return *this; } __host__ __device__ constexpr size_t& operator[](const size_t& i) const { #ifdef __DEBUG if (i > length){ printf("Index %lu out of shape length %lu\n", i, length); #ifndef __CUDACC__ throw std::out_of_range("Index out of shape size"); #endif } #endif return data[i]; } constexpr bool operator==(const Shape& other) const noexcept { if (length != other.length) return false; #ifdef __DEBUG if (total != other.total) return false; #endif for(size_t i = 0; i < length; ++i) if (data[i] != other[i]) return false; return true; } constexpr bool operator!=(const Shape& other) const noexcept { return !(*this == other); } } Shape; __host__ __device__ size_t prod(const Shape&, const size_t& = 0) noexcept; template struct Array { Shape shape; T* data = nullptr; size_t* refcount = nullptr; __host__ __device__ Array() noexcept { // #ifdef __DEBUG // print("Array created (default)"); // #endif } __host__ __device__ Array(const Shape& shape, T* data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { // #ifdef __DEBUG // print("Array created (raw, copy shape)"); // #endif } __host__ __device__ Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { // #ifdef __DEBUG // print("Array created (raw empty, copy shape)"); // #endif } __host__ __device__ Array(Shape&& shape, T* data) noexcept : shape(std::move(shape)), data(data), refcount(new size_t(1)) { // #ifdef __DEBUG // print("Array created (raw, move shape)"); // #endif } __host__ __device__ Array(Shape&& shape) noexcept : shape(std::move(shape)), data(new T[np::prod(shape)]), refcount(new size_t(1)) { // #ifdef __DEBUG // print("Array created (raw empty, move shape)"); // #endif } __host__ __device__ Array(const Array& array) noexcept : shape(array.shape) { #ifdef __DEBUG print("Array created (copy)"); #endif if (data != nullptr && data != array.data){ #ifdef __debug print("Former array deleted (move)"); #endif delete[] data; } if (refcount != nullptr && refcount != array.refcount){ #ifdef __DEBUG print("Former array refcount freed (move)"); #endif delete refcount; } // const size_t size = np::prod(shape); // data = new T[size]; // memcpy(data, array.data, size); // refcount = new size_t; // memcpy(refcount, array.refcount, sizeof(size_t)); data = array.data; refcount = array.refcount; if (refcount != nullptr) (*refcount)++; #ifdef __DEBUG else print("Moved array has null refcount"); #endif } __host__ __device__ Array(Array&& array) noexcept { // #ifdef __DEBUG // print("Array created (move)"); // #endif if (data != nullptr && data != array.data){ #ifdef __DEBUG print("Former array deleted (move)"); #endif delete[] data; } if (refcount != nullptr && refcount != array.refcount){ #ifdef __DEBUG print("Former array refcount freed (move)"); #endif delete refcount; } shape = std::move(array.shape); data = array.data; refcount = array.refcount; array.data = nullptr; array.refcount = nullptr; } __host__ __device__ ~Array() noexcept { if(refcount == nullptr){ // #ifdef __DEBUG // print("Array refcount freed more than once"); // #endif return; } --(*refcount); // #ifdef __DEBUG // printf("Array destructed : %lu\n", *refcount); // #endif if(*refcount == 0){ if (data != nullptr){ delete[] data; data = nullptr; // #ifdef __DEBUG // print("Array freeing ..."); // #endif } #ifdef __DEBUG else printf("Array freed more than once : %lu\n", *refcount); #endif delete refcount; refcount = nullptr; } } __host__ __device__ Array& operator=(const Array& array) noexcept { #ifdef __DEBUG print("Array created (assign copy)"); #endif if (data != nullptr && data != array.data){ #ifdef __DEBUG print("Former array deleted (assign copy)"); #endif delete[] data; } if (refcount != nullptr && refcount != array.refcount){ #ifdef __DEBUG print("Former array refcount freed (assign copy)"); #endif delete refcount; } shape = array.shape; // const size_t size = np::prod(shape) * sizeof(T); // data = new T[size]; // memcpy(data, array.data, size); // refcount = new size_t; // memcpy(refcount, array.refcount, sizeof(size_t)); data = array.data; refcount = array.refcount; if (refcount != nullptr) (*refcount)++; #ifdef __DEBUG else print("Assigned array has null refcount"); #endif return *this; } __host__ __device__ Array& operator=(Array&& array) noexcept { // #ifdef __DEBUG // print("Array created (assign move)"); // #endif if (data != nullptr && data != array.data){ #ifdef __DEBUG print("Former array deleted (assign move)"); #endif delete[] data; } if (refcount != nullptr && refcount != array.refcount){ #ifdef __DEBUG print("Former array refcount freed (assign move)"); #endif delete refcount; } shape = std::move(array.shape); data = array.data; refcount = array.refcount; array.refcount = nullptr; array.data = nullptr; return *this; } __host__ __device__ constexpr T& operator[](const size_t& i) const; // bool operator==(const Array& other) const noexcept; template Array& operator/=(const F& value) noexcept; // Array& operator*=(const T& value) noexcept; template Array& operator*=(const Array& other); template Array& operator+=(const Array& other); template Array operator*(const Array& other) const; // template // Array operator*(const F& other) const; template Array operator-(const np::Array& other) const; template Array operator-(const F& other) const; }; template Array empty(Shape&& shape) noexcept { return { std::move(shape), new T[np::prod(shape)] }; } template Array empty(const Shape& shape) noexcept { return { std::move(shape), new T[np::prod(shape)] }; } template Array empty(const std::initializer_list& dims) noexcept { const Shape shape(dims); return { std::move(shape), new T[np::prod(shape)] }; } template Array zeros(Shape&& shape) noexcept { return { std::move(shape), new T[np::prod(shape)]{0} }; } template Array zeros(const Shape& shape) noexcept { return { std::move(shape), new T[np::prod(shape)]{0} }; } template Array zeros(const std::initializer_list& dims) noexcept { const Shape shape(dims); return { std::move(shape), new T[np::prod(shape)]{0} }; } template __host__ __device__ constexpr T& Array::operator[](const size_t& i) const { #ifdef __DEBUG if (i > shape.total){ printf("Index %lu out of array size %lu\n", i, shape.total); #ifndef __CUDACC__ throw std::out_of_range("Index out of array size"); #endif } #endif return data[i]; } // bool Array::operator==(const Array& other) const noexcept { // if (shape != other.shape) // return false; // const size_t lenght = np::prod(shape); // for(size_t i = 0; i < length; ++i) // if (data[i] != other[i]) // return false; // return true; // } template template Array& Array::operator/=(const F& value) noexcept { const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) data[i] /= value; return *this; } // template // Array& Array::operator*=(const T& value) noexcept { // const size_t total = prod(shape); // for(size_t i = 0; i < total; ++i) // data[i] *= value; // return *this; // } template template Array Array::operator*(const Array& other) const { #ifdef __DEBUG if (shape != other.shape){ printf("Incompatible shapes\n"); throw; } #endif np::Array res = np::empty(shape); const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) res[i] = data[i] * other[i]; return res; } // template // template // Array Array::operator*(const F& value) const { // np::Array res = np::empty(shape); // const size_t total = prod(shape); // for(size_t i = 0; i < total; ++i) // res[i] = data[i] * value; // return res; // } // template // Array operator*(const F& value, const Array& other) { // np::Array res = np::empty(other.shape); // const size_t total = prod(other.shape); // for(size_t i = 0; i < total; ++i) // res[i] = other[i] * value; // return res; // } template template Array& Array::operator*=(const Array& other) { #ifdef __DEBUG if (shape != other.shape){ printf("Incompatible shapes\n"); throw; } #endif const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) data[i] *= other[i]; return *this; } template template Array& Array::operator+=(const Array& other) { #ifdef __DEBUG if (shape != other.shape){ printf("Incompatible shapes\n"); throw; } #endif const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) data[i] += other[i]; return *this; } template template Array Array::operator-(const F& other) const { np::Array res = np::empty(shape); const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) res[i] = data[i] - other; return res; } template template Array Array::operator-(const np::Array& other) const { #ifdef __DEBUG if (shape != other.shape){ printf("Incompatible shapes\n"); throw; } #endif np::Array res = np::empty(shape); const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) res[i] = data[i] - other[i]; return res; } // template // T prod(const Array& array, const size_t& offset = 0) noexcept { // T result = array[offset]; // const size_t total = prod(array.shape); // for(size_t i = 1 + offset; i < total; ++i) // result *= array[i]; // return result; // } template T sum(const Array& array) noexcept { T result = array[0]; const size_t total = prod(array.shape); for(size_t i = 1; i < total; ++i) result += array[i]; return result; } template F mean(const Array& array) noexcept { T result = array[0]; const size_t total = prod(array.shape); for(size_t i = 1; i < total; ++i) result += array[i]; return result / total; } template float64_t mean(const Array& array) noexcept { return mean(array); } template np::Array abs(const Array& array) noexcept { np::Array result = np::empty(array.shape); const size_t total = prod(array.shape); for(size_t i = 0; i < total; ++i) result[i] = std::abs(array[i]); return result; } template np::Array pow(const F& k, const Array& array) noexcept { np::Array result = np::empty(array.shape); const size_t total = prod(array.shape); for(size_t i = 0; i < total; ++i) result[i] = std::pow(k, array[i]); return result; } //template //T max(const Array& array) noexcept { // T result = array[0]; // for(size_t i = 1; i < prod(array.shape); ++i) // if(array[i] > result) // result = array[i]; // return result; //} //template //T min(const Array& array) noexcept { // T result = array[0]; // for(size_t i = 1; i < prod(array.shape); ++i) // if(array[i] < result) // result = array[i]; // return result; //} template Array astype(const Array& array) noexcept { Array res = empty(array.shape); const size_t total = prod(array.shape); for(size_t i = 0; i < total; ++i) res[i] = static_cast(array[i]); return res; } template Array operator-(const T& k, const Array& other) noexcept { np::Array res = empty(other.shape); const size_t total = prod(other.shape); for(size_t i = 0; i < total; ++i) res[i] = k - other[i]; return res; } template __host__ __device__ constexpr T min(const T& lhs, const T& rhs) noexcept { return lhs < rhs ? lhs : rhs; } // template // __host__ __device__ // constexpr T max(const T& lhs, const T& rhs) noexcept { // return lhs > rhs ? lhs : rhs; // } }; template constexpr np::Array& map(np::Array& a, const T(&fnc)(const size_t& i, const T& v)) noexcept { return std::function(fnc); } template constexpr np::Array& map(np::Array& a, const std::function& fnc) noexcept { const size_t a_length = np::prod(a.shape); for (size_t i = 0; i < a_length; ++i) a[i] = fnc(i, a[i]); return a; } //template //constexpr void foreach(const np::Array& a, const void(&fnc)(const size_t&, const T&)) noexcept { // return std::function(fnc); //} //template //constexpr void foreach(const np::Array& a, const std::function& fnc) noexcept { // for (size_t i = 0; i < a.length; ++i) // fnc(i, a[i]); //} template __host__ __device__ constexpr inline static void swap(T* a, T* b) noexcept { if (a == b) return; const T temp = *a; *a = *b; *b = temp; } template static int32_t qs_partition(const np::Array& a, const int32_t& l, const int32_t& h) noexcept { int32_t i = l - 1; for (int32_t j = l; j <= h; ++j) if (a[j] < a[h]) swap(&a[++i], &a[j]); swap(&a[++i], &a[h]); return i; } template void quicksort(const np::Array& a, const int32_t& l, const int32_t& h) noexcept { if (l >= h) return; const int32_t p = qs_partition(a, l, h); quicksort(a, l, p - 1); quicksort(a, p + 1, h); } template void quicksort(const np::Array& a) noexcept { quicksort(a, 0, a.length - 1); } template static size_t as_partition(const T* a, uint16_t* indices, const size_t& l, const size_t& h) noexcept { size_t i = l - 1; for (size_t j = l; j <= h; ++j) if (a[indices[j]] < a[indices[h]]) swap(&indices[++i], &indices[j]); swap(&indices[++i], &indices[h]); return i; } template void argsort(const T* a, uint16_t* indices, const size_t& l, const size_t& h) noexcept { const size_t total = h - l + 1; size_t* stack = new size_t[total]{l, h}; size_t top = 1, low = l, high = h; while (top <= total) { high = stack[top--]; low = stack[top--]; if(low >= high) break; const size_t p = as_partition(a, indices, low, high); if (p - 1 > low && p - 1 < total) { stack[++top] = low; stack[++top] = p - 1; } if (p + 1 < high) { stack[++top] = p + 1; stack[++top] = high; } } delete[] stack; } template np::Array argsort(const np::Array& other, const size_t& l, const size_t& h) noexcept { np::Array indices = np::empty(other.shape); map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; }); argsort(other, indices, l, h); return indices; } template np::Array argsort(const np::Array* other, const size_t& length) noexcept { return argsort(other, 0, length - 1); } std::array, 4> load_datasets(void); void print_error_file(const char*) noexcept; template void save(const np::Array& d, const char* filename) { FILE* output = fopen(filename, "wb"); if (output == NULL) { print_error_file(filename); throw; } assert(d.shape.refcount != 0);//, "Refcount shape is zero !!"); fwrite(&d.shape.length, sizeof(size_t), 1, output); fwrite(d.shape.data, sizeof(size_t), d.shape.length, output); assert(d.refcount != 0);//, "Refcount array is zero !!"); fwrite(d.data, sizeof(T), np::prod(d.shape), output); fclose(output); } template np::Array load(const char* filename) { FILE* input = fopen(filename, "rb"); if (input == NULL) { print_error_file(filename); throw; } size_t length = 0; if(!fread(&length, sizeof(size_t), 1, input)){ print_error_file(filename); fclose(input); throw; } size_t* data = new size_t[length]; if(!fread(data, sizeof(size_t), length, input)){ print_error_file(filename); fclose(input); throw; } np::Array d = np::empty(np::Shape(length, data)); if(!fread(d.data, sizeof(T), np::prod(d.shape), input)){ print_error_file(filename); fclose(input); throw; } fclose(input); return d; } #ifdef __CUDACC__ template np::Array copyToDevice(const char* name, const np::Array& array) noexcept { const size_t array_size = np::prod(array.shape) * sizeof(T); const size_t shape_size = array.shape.length * sizeof(size_t); np::Array d_array; //_print_cuda_error_(name, cudaMalloc(&d_array.refcount, sizeof(size_t))); _print_cuda_error_(name, cudaMalloc(&d_array.data, array_size)); //_print_cuda_error_(name, cudaMalloc(&d_array.shape.refcount, sizeof(size_t))); _print_cuda_error_(name, cudaMalloc(&d_array.shape.data, shape_size)); d_array.shape.length = array.shape.length; //_print_cuda_error_(name, cudaMemcpy(d_array.refcount, array.refcount, sizeof(size_t), cudaMemcpyHostToDevice)); _print_cuda_error_(name, cudaMemcpy(d_array.data, array.data, array_size, cudaMemcpyHostToDevice)); //_print_cuda_error_(name, cudaMemcpy(d_array.shape.refcount, array.shape.refcount, sizeof(size_t), cudaMemcpyHostToDevice)); _print_cuda_error_(name, cudaMemcpy(d_array.shape.data, array.shape.data, shape_size, cudaMemcpyHostToDevice)); #ifdef __DEBUG d_array.shape.total = np::prod(array.shape); #endif return d_array; } template constexpr void cudaFree(const char* name, np::Array& array) noexcept { //_print_cuda_error_(name, cudaFree(array.refcount)); //array.refcount = nullptr; _print_cuda_error_(name, cudaFree(array.data)); array.data = nullptr; //_print_cuda_error_(name, cudaFree(array.shape.refcount)); //array.shape.refcount = nullptr; _print_cuda_error_(name, cudaFree(array.shape.data)); array.shape.data = nullptr; } constexpr inline void _print_cuda_error_(const char* name, const cudaError_t& err) noexcept { if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err)); } #endif int print(const np::Shape&) noexcept; int print(const np::Array&) noexcept; int print(const np::Array&) noexcept; int print(const np::Array&, const np::Slice&) noexcept; int print(const np::Array&, const np::Slice&) noexcept; int print(const np::Array&, const np::Slice&) noexcept; int print(const np::Array&, const np::Slice&) noexcept; int print_feat(const np::Array&, const np::Slice&) noexcept;