#pragma once #include #include #include #include #include #include #include "config.hpp" #define BUFFER_SIZE 256 #define STRING_INT_SIZE 8 // Length of a number in log10 (including '-') #ifndef __CUDACC__ #define __host__ #define __device__ #endif typedef float float32_t; typedef double float64_t; typedef long double float128_t; namespace np { constexpr const float64_t inf = std::numeric_limits::infinity(); typedef struct Slice { size_t x = 0, y = 0, z = 0; } Slice; typedef struct Shape { size_t length = 0; size_t* data = nullptr; size_t* refcount = nullptr; #if __DEBUG size_t total = 1; #endif __host__ __device__ Shape(void) noexcept { #if __DEBUG printf("Shape created (default)\n"); #endif } __host__ __device__ Shape(const size_t& length, size_t* const data) noexcept : length(length), data(data), refcount(new size_t(1)) { #if __DEBUG printf("Shape created (raw)\n"); for(size_t i = 0; i < length; ++i) total *= data[i]; #endif } __host__ __device__ Shape(const std::initializer_list& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) { #if __DEBUG printf("Shape created (initializer)\n"); #endif const size_t* const begin = dims.begin(); for(size_t i = 0; i < length; ++i){ data[i] = begin[i]; #if __DEBUG total *= data[i]; #endif } } __host__ __device__ Shape(const Shape& shape) noexcept { #if __DEBUG printf("Shape created (copy)\n"); #endif if (data != nullptr && data != shape.data){ #if __DEBUG printf("Former shape deleted (copy)\n"); #endif delete[] data; } if (refcount != nullptr && refcount != shape.refcount){ #if __DEBUG printf("Former shape refcount freed (copy)\n"); #endif delete refcount; } length = shape.length; data = shape.data; refcount = shape.refcount; if (refcount != nullptr) (*refcount)++; #if __DEBUG else printf("Moved shape has null refcount\n"); #endif #if __DEBUG total = shape.total; #endif } __host__ __device__ Shape(Shape&& shape) noexcept { #if __DEBUG printf("Shape created (move)\n"); #endif if (data != nullptr && data != shape.data){ #if __DEBUG printf("Former shape deleted (move)\n"); #endif delete[] data; } if (refcount != nullptr && refcount != shape.refcount){ #if __DEBUG printf("Former shape refcount freed (move)\n"); #endif delete refcount; } length = shape.length; data = shape.data; refcount = shape.refcount; shape.length = 0; shape.data = nullptr; shape.refcount = nullptr; #if __DEBUG total = shape.total; shape.total = 1; #endif } __host__ __device__ ~Shape(void) noexcept { if(refcount == nullptr){ #if __DEBUG printf("Shape refcount freed more than once\n"); #endif return; } --(*refcount); #if __DEBUG printf("Shape destructed : %lu\n", *refcount); #endif if(*refcount == 0){ if (data != nullptr){ delete[] data; data = nullptr; #if __DEBUG printf("Shape freeing ...\n"); #endif } #if __DEBUG else printf("Shape freed more than once : %lu\n", *refcount); #endif delete refcount; refcount = nullptr; #if __DEBUG total = 1; #endif } } __host__ __device__ Shape& operator=(const Shape& shape) noexcept { #if __DEBUG printf("Shape created (assign copy)\n"); #endif if (data != nullptr && data != shape.data){ #if __DEBUG printf("Former shape deleted (assign copy)\n"); #endif delete[] data; } if (refcount != nullptr && refcount != shape.refcount){ #if __DEBUG printf("Former shape refcount freed (assign copy)\n"); #endif delete refcount; } length = shape.length; data = shape.data; refcount = shape.refcount; if (refcount != nullptr) (*refcount)++; #if __DEBUG else printf("Assigned copy shape has null refcount\n"); total = shape.total; #endif return *this; } __host__ __device__ Shape& operator=(Shape&& shape) noexcept { #if __DEBUG printf("Shape created (assign move)\n"); #endif if (data != nullptr && data != shape.data){ #if __DEBUG printf("Former shape deleted (assign move)\n"); #endif delete[] data; } if (refcount != nullptr && refcount != shape.refcount){ #if __DEBUG printf("Former shape refcount freed (assign move)\n"); #endif delete refcount; } length = shape.length; data = shape.data; refcount = shape.refcount; #if __DEBUG if (refcount == nullptr) printf("Assigned copy shape has null refcount\n"); total = shape.total; shape.total = 1; #endif shape.length = 0; shape.data = nullptr; shape.refcount = nullptr; return *this; } __host__ __device__ constexpr size_t& operator[](const size_t& i) const { #if __DEBUG if (i > length){ printf("Index %lu out of shape length %lu\n", i, length); #ifndef __CUDACC__ throw std::out_of_range("Index out of shape size"); #endif } #endif return data[i]; } constexpr bool operator==(const Shape& other) const noexcept { if (length != other.length) return false; #if __DEBUG if (total != other.total) return false; #endif for(size_t i = 0; i < length; ++i) if (data[i] != other[i]) return false; return true; } constexpr bool operator!=(const Shape& other) const noexcept { return !(*this == other); } } Shape; __host__ __device__ size_t prod(const Shape&, const size_t& = 0) noexcept; template struct Array { Shape shape; T* data = nullptr; size_t* refcount = nullptr; __host__ __device__ Array(void) noexcept { #if __DEBUG printf("Array created (default)\n"); #endif } __host__ __device__ Array(const Shape& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { #if __DEBUG printf("Array created (raw, copy shape)\n"); #endif } __host__ __device__ Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { #if __DEBUG printf("Array created (raw empty, copy shape)\n"); #endif } __host__ __device__ Array(Shape&& shape, T* const data) noexcept : shape(shape), data(data), refcount(new size_t(1)) { #if __DEBUG printf("Array created (raw, move shape)\n"); #endif } __host__ __device__ Array(Shape&& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) { #if __DEBUG printf("Array created (raw empty, move shape)\n"); #endif } __host__ __device__ Array(const Array& array) noexcept : shape(array.shape) { #if __DEBUG printf("Array created (copy)\n"); #endif if (data != nullptr && data != array.data){ #if __DEBUG printf("Former array deleted (copy)\n"); #endif delete[] data; } if (refcount != nullptr && refcount != array.refcount){ #if __DEBUG printf("Former array refcount freed (copy)\n"); #endif delete refcount; } data = array.data; refcount = array.refcount; if (refcount != nullptr) (*refcount)++; #if __DEBUG else printf("Moved array has null refcount\n"); #endif } __host__ __device__ Array(Array&& array) noexcept : shape(std::move(array.shape)) { #if __DEBUG printf("Array created (move)\n"); #endif if (data != nullptr && data != array.data){ #if __DEBUG printf("Former array deleted (move)\n"); #endif delete[] data; } if (refcount != nullptr && refcount != array.refcount){ #if __DEBUG printf("Former array refcount freed (move)\n"); #endif delete refcount; } data = array.data; refcount = array.refcount; array.data = nullptr; array.refcount = nullptr; } __host__ __device__ ~Array(void) noexcept { if(refcount == nullptr){ #if __DEBUG printf("Array refcount freed more than once\n"); #endif return; } --(*refcount); #if __DEBUG printf("Array destructed : %lu\n", *refcount); #endif if(*refcount == 0){ if (data != nullptr){ delete[] data; data = nullptr; #if __DEBUG printf("Array freeing ...\n"); #endif } #if __DEBUG else printf("Array freed more than once : %lu\n", *refcount); #endif delete refcount; refcount = nullptr; } } __host__ __device__ Array& operator=(const Array& array) noexcept { #if __DEBUG printf("Array created (assign copy)\n"); #endif if (data != nullptr && data != array.data){ #if __DEBUG printf("Former array deleted (assign copy)\n"); #endif delete[] data; } if (refcount != nullptr && refcount != array.refcount){ #if __DEBUG printf("Former array refcount freed (assign copy)\n"); #endif delete refcount; } shape = array.shape; data = array.data; refcount = array.refcount; if (refcount != nullptr) (*refcount)++; #if __DEBUG else printf("Assigned array has null refcount\n"); #endif return *this; } __host__ __device__ Array& operator=(Array&& array) noexcept { #if __DEBUG printf("Array created (assign move)\n"); #endif if (data != nullptr && data != array.data){ #if __DEBUG printf("Former array deleted (assign move)\n"); #endif delete[] data; } if (refcount != nullptr && refcount != array.refcount){ #if __DEBUG printf("Former array refcount freed (assign move)\n"); #endif delete refcount; } shape = std::move(array.shape); data = array.data; refcount = array.refcount; array.refcount = nullptr; array.data = nullptr; return *this; } __host__ __device__ constexpr T& operator[](const size_t& i) const; // bool operator==(const Array& other) const noexcept; template Array& operator/=(const F& value) noexcept; // Array& operator*=(const T& value) noexcept; template Array& operator*=(const Array& other); template Array& operator+=(const Array& other); template Array operator*(const Array& other) const; // template // Array operator*(const F& other) const; template Array operator-(const np::Array& other) const; template Array operator-(const F& other) const; }; template inline Array empty(Shape&& shape) noexcept { return Array(shape); } template inline Array empty(const Shape& shape) noexcept { return Array(shape); } template inline Array empty(const std::initializer_list& dims) noexcept { return Array(dims); } template Array zeros(Shape&& shape) noexcept { Array res(shape); memset(res.data, 0, sizeof(T) * np::prod(res.shape)); return res; } template Array zeros(const Shape& shape) noexcept { Array res(shape); memset(res.data, 0, sizeof(T) * np::prod(res.shape)); return res; } template Array zeros(const std::initializer_list& dims) noexcept { Array res(dims); memset(res.data, 0, sizeof(T) * np::prod(res.shape)); return res; } template __host__ __device__ constexpr T& Array::operator[](const size_t& i) const { #if __DEBUG if (i > shape.total){ printf("Index %lu out of array size %lu\n", i, shape.total); #ifndef __CUDACC__ throw std::out_of_range("Index out of array size"); #endif } #endif return data[i]; } // bool Array::operator==(const Array& other) const noexcept { // if (shape != other.shape) // return false; // const size_t length = np::prod(shape); // for(size_t i = 0; i < length; ++i) // if (data[i] != other[i]) // return false; // return true; // } template template Array& Array::operator/=(const F& value) noexcept { const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) data[i] /= value; return *this; } // template // Array& Array::operator*=(const T& value) noexcept { // const size_t total = prod(shape); // for(size_t i = 0; i < total; ++i) // data[i] *= value; // return *this; // } template template Array Array::operator*(const Array& other) const { #if __DEBUG if (shape != other.shape){ printf("Incompatible shapes\n"); throw; } #endif np::Array res = np::empty(shape); const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) res[i] = data[i] * other[i]; return res; } // template // template // Array Array::operator*(const F& value) const { // np::Array res = np::empty(shape); // const size_t total = prod(shape); // for(size_t i = 0; i < total; ++i) // res[i] = data[i] * value; // return res; // } // template // Array operator*(const F& value, const Array& other) { // np::Array res = np::empty(other.shape); // const size_t total = prod(other.shape); // for(size_t i = 0; i < total; ++i) // res[i] = other[i] * value; // return res; // } template template Array& Array::operator*=(const Array& other) { #if __DEBUG if (shape != other.shape){ printf("Incompatible shapes\n"); throw; } #endif const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) data[i] *= other[i]; return *this; } template template Array& Array::operator+=(const Array& other) { #if __DEBUG if (shape != other.shape){ printf("Incompatible shapes\n"); throw; } #endif const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) data[i] += other[i]; return *this; } template template Array Array::operator-(const F& other) const { np::Array res = np::empty(shape); const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) res[i] = data[i] - other; return res; } template template Array Array::operator-(const np::Array& other) const { #if __DEBUG if (shape != other.shape){ printf("Incompatible shapes\n"); throw; } #endif np::Array res = np::empty(shape); const size_t total = prod(shape); for(size_t i = 0; i < total; ++i) res[i] = data[i] - other[i]; return res; } // template // T prod(const Array& array, const size_t& offset = 0) noexcept { // T result = array[offset]; // const size_t total = prod(array.shape); // for(size_t i = 1 + offset; i < total; ++i) // result *= array[i]; // return result; // } template T sum(const Array& array) noexcept { T result = array[0]; const size_t total = prod(array.shape); for(size_t i = 1; i < total; ++i) result += array[i]; return result; } template F mean(const Array& array) noexcept { T result = array[0]; const size_t total = prod(array.shape); for(size_t i = 1; i < total; ++i) result += array[i]; return result / total; } template float64_t mean(const Array& array) noexcept { return mean(array); } template np::Array abs(const Array& array) noexcept { np::Array result = np::empty(array.shape); const size_t total = prod(array.shape); for(size_t i = 0; i < total; ++i) result[i] = std::abs(array[i]); return result; } template np::Array pow(const F& k, const Array& array) noexcept { np::Array result = np::empty(array.shape); const size_t total = prod(array.shape); for(size_t i = 0; i < total; ++i) result[i] = std::pow(k, array[i]); return result; } //template //T max(const Array& array) noexcept { // T result = array[0]; // for(size_t i = 1; i < prod(array.shape); ++i) // if(array[i] > result) // result = array[i]; // return result; //} //template //T min(const Array& array) noexcept { // T result = array[0]; // for(size_t i = 1; i < prod(array.shape); ++i) // if(array[i] < result) // result = array[i]; // return result; //} template Array astype(const Array& array) noexcept { Array res = empty(array.shape); const size_t total = prod(array.shape); for(size_t i = 0; i < total; ++i) res[i] = static_cast(array[i]); return res; } template Array operator-(const T& k, const Array& other) noexcept { np::Array res = empty(other.shape); const size_t total = prod(other.shape); for(size_t i = 0; i < total; ++i) res[i] = k - other[i]; return res; } template __host__ __device__ constexpr T min(const T& lhs, const T& rhs) noexcept { return lhs < rhs ? lhs : rhs; } // template // __host__ __device__ // constexpr T max(const T& lhs, const T& rhs) noexcept { // return lhs > rhs ? lhs : rhs; // } }; template constexpr np::Array& map(np::Array& a, const T(&fnc)(const size_t& i, const T& v)) noexcept { return std::function(fnc); } template constexpr np::Array& map(np::Array& a, const std::function& fnc) noexcept { const size_t a_length = np::prod(a.shape); for (size_t i = 0; i < a_length; ++i) a[i] = fnc(i, a[i]); return a; } //template //constexpr void foreach(const np::Array& a, const void(&fnc)(const size_t&, const T&)) noexcept { // return std::function(fnc); //} //template //constexpr void foreach(const np::Array& a, const std::function& fnc) noexcept { // for (size_t i = 0; i < a.length; ++i) // fnc(i, a[i]); //} template __host__ __device__ constexpr inline static void swap(T* const a, T* const b) noexcept { if (a == b) return; const T temp = *a; *a = *b; *b = temp; } template static int32_t qs_partition(const np::Array& a, const int32_t& l, const int32_t& h) noexcept { int32_t i = l - 1; for (int32_t j = l; j <= h; ++j) if (a[j] < a[h]) swap(&a[++i], &a[j]); swap(&a[++i], &a[h]); return i; } template void quicksort(const np::Array& a, const int32_t& l, const int32_t& h) noexcept { if (l >= h) return; const int32_t p = qs_partition(a, l, h); quicksort(a, l, p - 1); quicksort(a, p + 1, h); } template void quicksort(const np::Array& a) noexcept { quicksort(a, 0, a.length - 1); } template static size_t as_partition(const T* const a, uint16_t* const indices, const size_t& l, const size_t& h) noexcept { size_t i = l - 1; for (size_t j = l; j <= h; ++j) if (a[indices[j]] < a[indices[h]]) swap(&indices[++i], &indices[j]); swap(&indices[++i], &indices[h]); return i; } std::array, 4> load_datasets(void); void print_error_file(const char* const) noexcept; template void save(const np::Array& d, const char* const filename) { FILE* const output = fopen(filename, "wb"); if (output == NULL) { print_error_file(filename); throw; } assert(d.shape.refcount != 0); fwrite(&d.shape.length, sizeof(size_t), 1, output); fwrite(d.shape.data, sizeof(size_t), d.shape.length, output); assert(d.refcount != 0); fwrite(d.data, sizeof(T), np::prod(d.shape), output); fclose(output); } template np::Array load(const char* const filename) { FILE* const input = fopen(filename, "rb"); if (input == NULL) { print_error_file(filename); throw; } size_t length = 0; if(!fread(&length, sizeof(size_t), 1, input)){ print_error_file(filename); fclose(input); throw; } size_t* const data = new size_t[length]; if(!fread(data, sizeof(size_t), length, input)){ print_error_file(filename); fclose(input); throw; } np::Array d = np::empty(np::Shape(length, data)); if(!fread(d.data, sizeof(T), np::prod(d.shape), input)){ print_error_file(filename); fclose(input); throw; } fclose(input); return d; } #ifdef __CUDACC__ template np::Array copyToDevice(const char* const name, const np::Array& array) noexcept { const size_t array_size = np::prod(array.shape) * sizeof(T); const size_t shape_size = array.shape.length * sizeof(size_t); np::Array d_array; //_print_cuda_error_(name, cudaMalloc(&d_array.refcount, sizeof(size_t))); _print_cuda_error_(name, cudaMalloc(&d_array.data, array_size)); //_print_cuda_error_(name, cudaMalloc(&d_array.shape.refcount, sizeof(size_t))); _print_cuda_error_(name, cudaMalloc(&d_array.shape.data, shape_size)); d_array.shape.length = array.shape.length; //_print_cuda_error_(name, cudaMemcpy(d_array.refcount, array.refcount, sizeof(size_t), cudaMemcpyHostToDevice)); _print_cuda_error_(name, cudaMemcpy(d_array.data, array.data, array_size, cudaMemcpyHostToDevice)); //_print_cuda_error_(name, cudaMemcpy(d_array.shape.refcount, array.shape.refcount, sizeof(size_t), cudaMemcpyHostToDevice)); _print_cuda_error_(name, cudaMemcpy(d_array.shape.data, array.shape.data, shape_size, cudaMemcpyHostToDevice)); #if __DEBUG d_array.shape.total = np::prod(array.shape); #endif return d_array; } template constexpr void cudaFree(const char* const name, np::Array& array) noexcept { //_print_cuda_error_(name, cudaFree(array.refcount)); //array.refcount = nullptr; _print_cuda_error_(name, cudaFree(array.data)); array.data = nullptr; //_print_cuda_error_(name, cudaFree(array.shape.refcount)); //array.shape.refcount = nullptr; _print_cuda_error_(name, cudaFree(array.shape.data)); array.shape.data = nullptr; } constexpr inline void _print_cuda_error_(const char* const name, const cudaError_t& err) noexcept { if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err)); } #endif int32_t print(const np::Shape&) noexcept; int32_t print(const np::Array&) noexcept; int32_t print(const np::Array&) noexcept; int32_t print(const np::Array&, const np::Slice&) noexcept; int32_t print(const np::Array&, const np::Slice&) noexcept; int32_t print(const np::Array&, const np::Slice&) noexcept; int32_t print(const np::Array&, const np::Slice&) noexcept; int32_t print_feat(const np::Array&, const np::Slice&) noexcept;