955 lines
24 KiB
C++
955 lines
24 KiB
C++
#pragma once
|
|
#include <iostream>
|
|
#include <cstring>
|
|
#include <cmath>
|
|
#include <cassert>
|
|
#include <functional>
|
|
#include <memory>
|
|
|
|
#define DATA_DIR "../data"
|
|
#define OUT_DIR "./out"
|
|
#define MODEL_DIR "./models"
|
|
#define BUFFER_SIZE 256
|
|
#define STRING_INT_SIZE 8 // Length of a number in log10 (including '-')
|
|
#define S(N) std::string(N, '-').c_str()
|
|
|
|
#ifndef __CUDACC__
|
|
#define __host__
|
|
#define __device__
|
|
#endif
|
|
|
|
typedef float float32_t;
|
|
typedef double float64_t;
|
|
typedef long double float128_t;
|
|
|
|
__host__ __device__
|
|
constexpr inline int print(const char* str) noexcept {
|
|
return printf("%s\n", str);
|
|
}
|
|
|
|
inline int print(const std::string& s) noexcept {
|
|
return printf("%s\n", s.c_str());
|
|
}
|
|
|
|
namespace np {
|
|
constexpr const float64_t inf = std::numeric_limits<float64_t>::infinity();
|
|
|
|
typedef struct Slice {
|
|
size_t x = 0, y = 0, z = 0;
|
|
} Slice;
|
|
|
|
typedef struct Shape {
|
|
size_t length = 0;
|
|
size_t* data = nullptr;
|
|
size_t* refcount = nullptr;
|
|
#ifdef __DEBUG
|
|
size_t total = 1;
|
|
#endif
|
|
|
|
__host__ __device__
|
|
Shape() noexcept {
|
|
// #ifdef __DEBUG
|
|
// print("Shape created (default)");
|
|
// #endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Shape(const size_t& length, size_t* data) noexcept : length(length), data(data), refcount(new size_t(1)) {
|
|
#ifdef __DEBUG
|
|
//print("Shape created (raw)");
|
|
for(size_t i = 0; i < length; ++i)
|
|
total *= data[i];
|
|
#endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Shape(const std::initializer_list<size_t>& dims) noexcept : length(dims.size()), data(new size_t[dims.size()]), refcount(new size_t(1)) {
|
|
// #ifdef __DEBUG
|
|
// print("Shape created (initializer)");
|
|
// #endif
|
|
const auto* begin = dims.begin();
|
|
for(size_t i = 0; i < length; ++i){
|
|
data[i] = begin[i];
|
|
#ifdef __DEBUG
|
|
total *= data[i];
|
|
#endif
|
|
}
|
|
}
|
|
|
|
__host__ __device__
|
|
Shape(const Shape& shape) noexcept {
|
|
#ifdef __DEBUG
|
|
print("Shape created (copy)");
|
|
#endif
|
|
if (data != nullptr && data != shape.data){
|
|
#ifdef __DEBUG
|
|
print("Former shape deleted (copy)");
|
|
#endif
|
|
delete[] data;
|
|
}
|
|
if (refcount != nullptr && refcount != shape.refcount){
|
|
#ifdef __DEBUG
|
|
print("Former shape refcount freed (copy)");
|
|
#endif
|
|
delete refcount;
|
|
}
|
|
length = shape.length;
|
|
|
|
//data = new size_t[length];
|
|
//memcpy(data, shape.data, length * sizeof(size_t));
|
|
//refcount = new size_t;
|
|
//memcpy(refcount, shape.refcount, sizeof(size_t));
|
|
|
|
data = shape.data;
|
|
refcount = shape.refcount;
|
|
if (refcount != nullptr)
|
|
(*refcount)++;
|
|
#ifdef __DEBUG
|
|
else
|
|
print("Moved shape has null refcount");
|
|
total = shape.total;
|
|
#endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Shape(Shape&& shape) noexcept {
|
|
// #ifdef __DEBUG
|
|
// print("Shape created (move));
|
|
// #endif
|
|
if (data != nullptr && data != shape.data){
|
|
#ifdef __DEBUG
|
|
print("Former shape deleted (move)");
|
|
#endif
|
|
delete[] data;
|
|
}
|
|
if (refcount != nullptr && refcount != shape.refcount){
|
|
#ifdef __DEBUG
|
|
print("Former shape refcount freed (move)");
|
|
#endif
|
|
delete refcount;
|
|
}
|
|
length = shape.length;
|
|
data = shape.data;
|
|
refcount = shape.refcount;
|
|
|
|
shape.length = 0;
|
|
shape.data = nullptr;
|
|
shape.refcount = nullptr;
|
|
#ifdef __DEBUG
|
|
total = shape.total;
|
|
shape.total = 1;
|
|
#endif
|
|
}
|
|
|
|
__host__ __device__
|
|
~Shape() noexcept {
|
|
if(refcount == nullptr){
|
|
// #ifdef __DEBUG
|
|
// print("Shape refcount freed more than once");
|
|
// #endif
|
|
return;
|
|
}
|
|
--(*refcount);
|
|
// #ifdef __DEBUG
|
|
// printf("Shape destructed : %lu\n", *refcount);
|
|
// #endif
|
|
if(*refcount == 0){
|
|
if (data != nullptr){
|
|
delete[] data;
|
|
data = nullptr;
|
|
// #ifdef __DEBUG
|
|
// print("Shape freeing ...");
|
|
// #endif
|
|
}
|
|
//#ifdef __DEBUG
|
|
else
|
|
printf("Shape freed more than once : %lu\n", *refcount);
|
|
//#endif
|
|
delete refcount;
|
|
refcount = nullptr;
|
|
#ifdef __DEBUG
|
|
total = 1;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
__host__ __device__
|
|
Shape& operator=(const Shape& shape) noexcept {
|
|
#ifdef __DEBUG
|
|
print("Shape created (assign copy)");
|
|
#endif
|
|
if (data != nullptr && data != shape.data){
|
|
#ifdef __DEBUG
|
|
print("Former shape deleted (assign copy)");
|
|
#endif
|
|
delete[] data;
|
|
}
|
|
if (refcount != nullptr && refcount != shape.refcount){
|
|
#ifdef __DEBUG
|
|
print("Former shape refcount freed (assign copy)");
|
|
#endif
|
|
delete refcount;
|
|
}
|
|
length = shape.length;
|
|
|
|
// data = new size_t[length];
|
|
// memcpy(data, shape.data, length * sizeof(size_t));
|
|
// refcount = new size_t;
|
|
// memcpy(refcount, shape.refcount, sizeof(size_t));
|
|
|
|
data = shape.data;
|
|
refcount = shape.refcount;
|
|
if (refcount != nullptr)
|
|
(*refcount)++;
|
|
#ifdef __DEBUG
|
|
else
|
|
printf("Assigned copy shape has null refcount");
|
|
total = shape.total;
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
__host__ __device__
|
|
Shape& operator=(Shape&& shape) noexcept {
|
|
// #ifdef __DEBUG
|
|
// print("Shape created (assign move)");
|
|
// #endif
|
|
if (data != nullptr && data != shape.data){
|
|
#ifdef __DEBUG
|
|
print("Former shape deleted (assign move)");
|
|
#endif
|
|
delete[] data;
|
|
}
|
|
if (refcount != nullptr && refcount != shape.refcount){
|
|
#ifdef __DEBUG
|
|
print("Former shape refcount freed (assign move)");
|
|
#endif
|
|
delete refcount;
|
|
}
|
|
length = shape.length;
|
|
data = shape.data;
|
|
refcount = shape.refcount;
|
|
#ifdef __DEBUG
|
|
total = shape.total;
|
|
if (refcount == nullptr)
|
|
print("Assigned copy shape has null refcount");
|
|
shape.total = 1;
|
|
#endif
|
|
shape.length = 0;
|
|
shape.data = nullptr;
|
|
shape.refcount = nullptr;
|
|
return *this;
|
|
}
|
|
|
|
|
|
__host__ __device__
|
|
constexpr size_t& operator[](const size_t& i) const {
|
|
#ifdef __DEBUG
|
|
if (i > length){
|
|
printf("Index %lu out of shape length %lu\n", i, length);
|
|
#ifndef __CUDACC__
|
|
throw std::out_of_range("Index out of shape size");
|
|
#endif
|
|
}
|
|
#endif
|
|
return data[i];
|
|
}
|
|
|
|
constexpr bool operator==(const Shape& other) const noexcept {
|
|
if (length != other.length)
|
|
return false;
|
|
#ifdef __DEBUG
|
|
if (total != other.total)
|
|
return false;
|
|
#endif
|
|
for(size_t i = 0; i < length; ++i)
|
|
if (data[i] != other[i])
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
constexpr bool operator!=(const Shape& other) const noexcept {
|
|
return !(*this == other);
|
|
}
|
|
} Shape;
|
|
|
|
__host__ __device__
|
|
size_t prod(const Shape&, const size_t& = 0) noexcept;
|
|
|
|
template<typename T>
|
|
struct Array {
|
|
Shape shape;
|
|
T* data = nullptr;
|
|
size_t* refcount = nullptr;
|
|
|
|
__host__ __device__
|
|
Array() noexcept {
|
|
// #ifdef __DEBUG
|
|
// print("Array created (default)");
|
|
// #endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Array(const Shape& shape, T* data) noexcept : shape(shape), data(data), refcount(new size_t(1)) {
|
|
// #ifdef __DEBUG
|
|
// print("Array created (raw, copy shape)");
|
|
// #endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Array(const Shape& shape) noexcept : shape(shape), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
|
// #ifdef __DEBUG
|
|
// print("Array created (raw empty, copy shape)");
|
|
// #endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Array(Shape&& shape, T* data) noexcept : shape(std::move(shape)), data(data), refcount(new size_t(1)) {
|
|
// #ifdef __DEBUG
|
|
// print("Array created (raw, move shape)");
|
|
// #endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Array(Shape&& shape) noexcept : shape(std::move(shape)), data(new T[np::prod(shape)]), refcount(new size_t(1)) {
|
|
// #ifdef __DEBUG
|
|
// print("Array created (raw empty, move shape)");
|
|
// #endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Array(const Array& array) noexcept : shape(array.shape) {
|
|
#ifdef __DEBUG
|
|
print("Array created (copy)");
|
|
#endif
|
|
if (data != nullptr && data != array.data){
|
|
#ifdef __debug
|
|
print("Former array deleted (move)");
|
|
#endif
|
|
delete[] data;
|
|
}
|
|
if (refcount != nullptr && refcount != array.refcount){
|
|
#ifdef __DEBUG
|
|
print("Former array refcount freed (move)");
|
|
#endif
|
|
delete refcount;
|
|
}
|
|
// const size_t size = np::prod(shape);
|
|
// data = new T[size];
|
|
// memcpy(data, array.data, size);
|
|
// refcount = new size_t;
|
|
// memcpy(refcount, array.refcount, sizeof(size_t));
|
|
|
|
data = array.data;
|
|
refcount = array.refcount;
|
|
if (refcount != nullptr)
|
|
(*refcount)++;
|
|
#ifdef __DEBUG
|
|
else
|
|
print("Moved array has null refcount");
|
|
#endif
|
|
}
|
|
|
|
__host__ __device__
|
|
Array(Array&& array) noexcept {
|
|
// #ifdef __DEBUG
|
|
// print("Array created (move)");
|
|
// #endif
|
|
if (data != nullptr && data != array.data){
|
|
#ifdef __DEBUG
|
|
print("Former array deleted (move)");
|
|
#endif
|
|
delete[] data;
|
|
}
|
|
if (refcount != nullptr && refcount != array.refcount){
|
|
#ifdef __DEBUG
|
|
print("Former array refcount freed (move)");
|
|
#endif
|
|
delete refcount;
|
|
}
|
|
shape = std::move(array.shape);
|
|
data = array.data;
|
|
refcount = array.refcount;
|
|
|
|
array.data = nullptr;
|
|
array.refcount = nullptr;
|
|
}
|
|
|
|
__host__ __device__
|
|
~Array() noexcept {
|
|
if(refcount == nullptr){
|
|
// #ifdef __DEBUG
|
|
// print("Array refcount freed more than once");
|
|
// #endif
|
|
return;
|
|
}
|
|
--(*refcount);
|
|
// #ifdef __DEBUG
|
|
// printf("Array destructed : %lu\n", *refcount);
|
|
// #endif
|
|
if(*refcount == 0){
|
|
if (data != nullptr){
|
|
delete[] data;
|
|
data = nullptr;
|
|
// #ifdef __DEBUG
|
|
// print("Array freeing ...");
|
|
// #endif
|
|
}
|
|
#ifdef __DEBUG
|
|
else
|
|
printf("Array freed more than once : %lu\n", *refcount);
|
|
#endif
|
|
delete refcount;
|
|
refcount = nullptr;
|
|
}
|
|
}
|
|
|
|
__host__ __device__
|
|
Array& operator=(const Array& array) noexcept {
|
|
#ifdef __DEBUG
|
|
print("Array created (assign copy)");
|
|
#endif
|
|
if (data != nullptr && data != array.data){
|
|
#ifdef __DEBUG
|
|
print("Former array deleted (assign copy)");
|
|
#endif
|
|
delete[] data;
|
|
}
|
|
if (refcount != nullptr && refcount != array.refcount){
|
|
#ifdef __DEBUG
|
|
print("Former array refcount freed (assign copy)");
|
|
#endif
|
|
delete refcount;
|
|
}
|
|
shape = array.shape;
|
|
|
|
// const size_t size = np::prod(shape) * sizeof(T);
|
|
// data = new T[size];
|
|
// memcpy(data, array.data, size);
|
|
// refcount = new size_t;
|
|
// memcpy(refcount, array.refcount, sizeof(size_t));
|
|
|
|
data = array.data;
|
|
refcount = array.refcount;
|
|
if (refcount != nullptr)
|
|
(*refcount)++;
|
|
#ifdef __DEBUG
|
|
else
|
|
print("Assigned array has null refcount");
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
__host__ __device__
|
|
Array& operator=(Array&& array) noexcept {
|
|
// #ifdef __DEBUG
|
|
// print("Array created (assign move)");
|
|
// #endif
|
|
if (data != nullptr && data != array.data){
|
|
#ifdef __DEBUG
|
|
print("Former array deleted (assign move)");
|
|
#endif
|
|
delete[] data;
|
|
}
|
|
if (refcount != nullptr && refcount != array.refcount){
|
|
#ifdef __DEBUG
|
|
print("Former array refcount freed (assign move)");
|
|
#endif
|
|
delete refcount;
|
|
}
|
|
shape = std::move(array.shape);
|
|
data = array.data;
|
|
refcount = array.refcount;
|
|
|
|
array.refcount = nullptr;
|
|
array.data = nullptr;
|
|
|
|
return *this;
|
|
}
|
|
|
|
__host__ __device__
|
|
constexpr T& operator[](const size_t& i) const;
|
|
// bool operator==(const Array& other) const noexcept;
|
|
template<typename F>
|
|
Array<T>& operator/=(const F& value) noexcept;
|
|
// Array<T>& operator*=(const T& value) noexcept;
|
|
template<typename F>
|
|
Array<T>& operator*=(const Array<F>& other);
|
|
template<typename F>
|
|
Array<T>& operator+=(const Array<F>& other);
|
|
template<typename F>
|
|
Array<T> operator*(const Array<F>& other) const;
|
|
// template<typename F>
|
|
// Array<T> operator*(const F& other) const;
|
|
template<typename F>
|
|
Array<T> operator-(const np::Array<F>& other) const;
|
|
template<typename F>
|
|
Array<T> operator-(const F& other) const;
|
|
};
|
|
|
|
template<typename T>
|
|
Array<T> empty(Shape&& shape) noexcept {
|
|
return { std::move(shape), new T[np::prod(shape)] };
|
|
}
|
|
|
|
template<typename T>
|
|
Array<T> empty(const Shape& shape) noexcept {
|
|
return { std::move(shape), new T[np::prod(shape)] };
|
|
}
|
|
|
|
template<typename T>
|
|
Array<T> empty(const std::initializer_list<size_t>& dims) noexcept {
|
|
const Shape shape(dims);
|
|
return { std::move(shape), new T[np::prod(shape)] };
|
|
}
|
|
|
|
template<typename T>
|
|
Array<T> zeros(Shape&& shape) noexcept {
|
|
return { std::move(shape), new T[np::prod(shape)]{0} };
|
|
}
|
|
|
|
template<typename T>
|
|
Array<T> zeros(const Shape& shape) noexcept {
|
|
return { std::move(shape), new T[np::prod(shape)]{0} };
|
|
}
|
|
|
|
template<typename T>
|
|
Array<T> zeros(const std::initializer_list<size_t>& dims) noexcept {
|
|
const Shape shape(dims);
|
|
return { std::move(shape), new T[np::prod(shape)]{0} };
|
|
}
|
|
|
|
template<typename T>
|
|
__host__ __device__
|
|
constexpr T& Array<T>::operator[](const size_t& i) const {
|
|
#ifdef __DEBUG
|
|
if (i > shape.total){
|
|
printf("Index %lu out of array size %lu\n", i, shape.total);
|
|
#ifndef __CUDACC__
|
|
throw std::out_of_range("Index out of array size");
|
|
#endif
|
|
}
|
|
#endif
|
|
return data[i];
|
|
}
|
|
|
|
// bool Array<T>::operator==(const Array& other) const noexcept {
|
|
// if (shape != other.shape)
|
|
// return false;
|
|
// const size_t lenght = np::prod(shape);
|
|
// for(size_t i = 0; i < length; ++i)
|
|
// if (data[i] != other[i])
|
|
// return false;
|
|
// return true;
|
|
// }
|
|
|
|
template<typename T>
|
|
template<typename F>
|
|
Array<T>& Array<T>::operator/=(const F& value) noexcept {
|
|
const size_t total = prod(shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
data[i] /= value;
|
|
return *this;
|
|
}
|
|
|
|
// template<typename T>
|
|
// Array<T>& Array<T>::operator*=(const T& value) noexcept {
|
|
// const size_t total = prod(shape);
|
|
// for(size_t i = 0; i < total; ++i)
|
|
// data[i] *= value;
|
|
// return *this;
|
|
// }
|
|
|
|
template<typename T>
|
|
template<typename F>
|
|
Array<T> Array<T>::operator*(const Array<F>& other) const {
|
|
#ifdef __DEBUG
|
|
if (shape != other.shape){
|
|
printf("Incompatible shapes\n");
|
|
throw;
|
|
}
|
|
#endif
|
|
np::Array<T> res = np::empty<T>(shape);
|
|
const size_t total = prod(shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
res[i] = data[i] * other[i];
|
|
return res;
|
|
}
|
|
|
|
// template<typename T>
|
|
// template<typename F>
|
|
// Array<T> Array<T>::operator*(const F& value) const {
|
|
// np::Array<T> res = np::empty<T>(shape);
|
|
// const size_t total = prod(shape);
|
|
// for(size_t i = 0; i < total; ++i)
|
|
// res[i] = data[i] * value;
|
|
// return res;
|
|
// }
|
|
|
|
// template<typename T, typename F>
|
|
// Array<T> operator*(const F& value, const Array<T>& other) {
|
|
// np::Array<T> res = np::empty<T>(other.shape);
|
|
// const size_t total = prod(other.shape);
|
|
// for(size_t i = 0; i < total; ++i)
|
|
// res[i] = other[i] * value;
|
|
// return res;
|
|
// }
|
|
|
|
template<typename T>
|
|
template<typename F>
|
|
Array<T>& Array<T>::operator*=(const Array<F>& other) {
|
|
#ifdef __DEBUG
|
|
if (shape != other.shape){
|
|
printf("Incompatible shapes\n");
|
|
throw;
|
|
}
|
|
#endif
|
|
const size_t total = prod(shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
data[i] *= other[i];
|
|
return *this;
|
|
}
|
|
|
|
template<typename T>
|
|
template<typename F>
|
|
Array<T>& Array<T>::operator+=(const Array<F>& other) {
|
|
#ifdef __DEBUG
|
|
if (shape != other.shape){
|
|
printf("Incompatible shapes\n");
|
|
throw;
|
|
}
|
|
#endif
|
|
const size_t total = prod(shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
data[i] += other[i];
|
|
return *this;
|
|
}
|
|
|
|
template<typename T>
|
|
template<typename F>
|
|
Array<T> Array<T>::operator-(const F& other) const {
|
|
np::Array<T> res = np::empty<T>(shape);
|
|
const size_t total = prod(shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
res[i] = data[i] - other;
|
|
return res;
|
|
}
|
|
|
|
template<typename T>
|
|
template<typename F>
|
|
Array<T> Array<T>::operator-(const np::Array<F>& other) const {
|
|
#ifdef __DEBUG
|
|
if (shape != other.shape){
|
|
printf("Incompatible shapes\n");
|
|
throw;
|
|
}
|
|
#endif
|
|
np::Array<T> res = np::empty<T>(shape);
|
|
const size_t total = prod(shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
res[i] = data[i] - other[i];
|
|
return res;
|
|
}
|
|
|
|
// template<typename T>
|
|
// T prod(const Array<T>& array, const size_t& offset = 0) noexcept {
|
|
// T result = array[offset];
|
|
// const size_t total = prod(array.shape);
|
|
// for(size_t i = 1 + offset; i < total; ++i)
|
|
// result *= array[i];
|
|
// return result;
|
|
// }
|
|
|
|
template<typename T>
|
|
T sum(const Array<T>& array) noexcept {
|
|
T result = array[0];
|
|
const size_t total = prod(array.shape);
|
|
for(size_t i = 1; i < total; ++i)
|
|
result += array[i];
|
|
return result;
|
|
}
|
|
|
|
template<typename T, typename F>
|
|
F mean(const Array<T>& array) noexcept {
|
|
T result = array[0];
|
|
const size_t total = prod(array.shape);
|
|
for(size_t i = 1; i < total; ++i)
|
|
result += array[i];
|
|
return result / total;
|
|
}
|
|
|
|
template<typename T>
|
|
float64_t mean(const Array<T>& array) noexcept {
|
|
return mean<float64_t, T>(array);
|
|
}
|
|
|
|
template<typename T>
|
|
np::Array<T> abs(const Array<T>& array) noexcept {
|
|
np::Array<T> result = np::empty<T>(array.shape);
|
|
const size_t total = prod(array.shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
result[i] = std::abs(array[i]);
|
|
return result;
|
|
}
|
|
|
|
template<typename T, typename F>
|
|
np::Array<T> pow(const F& k, const Array<T>& array) noexcept {
|
|
np::Array<T> result = np::empty<T>(array.shape);
|
|
const size_t total = prod(array.shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
result[i] = std::pow(k, array[i]);
|
|
return result;
|
|
}
|
|
|
|
//template<typename T>
|
|
//T max(const Array<T>& array) noexcept {
|
|
// T result = array[0];
|
|
// for(size_t i = 1; i < prod(array.shape); ++i)
|
|
// if(array[i] > result)
|
|
// result = array[i];
|
|
// return result;
|
|
//}
|
|
|
|
//template<typename T>
|
|
//T min(const Array<T>& array) noexcept {
|
|
// T result = array[0];
|
|
// for(size_t i = 1; i < prod(array.shape); ++i)
|
|
// if(array[i] < result)
|
|
// result = array[i];
|
|
// return result;
|
|
//}
|
|
|
|
template<typename T, typename F>
|
|
Array<T> astype(const Array<F>& array) noexcept {
|
|
Array<T> res = empty<T>(array.shape);
|
|
const size_t total = prod(array.shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
res[i] = static_cast<T>(array[i]);
|
|
return res;
|
|
}
|
|
|
|
template<typename T>
|
|
Array<T> operator-(const T& k, const Array<T>& other) noexcept {
|
|
np::Array<T> res = empty<T>(other.shape);
|
|
const size_t total = prod(other.shape);
|
|
for(size_t i = 0; i < total; ++i)
|
|
res[i] = k - other[i];
|
|
return res;
|
|
}
|
|
|
|
template<typename T>
|
|
__host__ __device__
|
|
constexpr T min(const T& lhs, const T& rhs) noexcept {
|
|
return lhs < rhs ? lhs : rhs;
|
|
}
|
|
|
|
// template<typename T>
|
|
// __host__ __device__
|
|
// constexpr T max(const T& lhs, const T& rhs) noexcept {
|
|
// return lhs > rhs ? lhs : rhs;
|
|
// }
|
|
};
|
|
|
|
template<typename T>
|
|
constexpr np::Array<T>& map(np::Array<T>& a, const T(&fnc)(const size_t& i, const T& v)) noexcept {
|
|
return std::function<T(const size_t&, const T&)>(fnc);
|
|
}
|
|
|
|
template<typename T>
|
|
constexpr np::Array<T>& map(np::Array<T>& a, const std::function<T(const size_t&, const T&)>& fnc) noexcept {
|
|
const size_t a_length = np::prod(a.shape);
|
|
for (size_t i = 0; i < a_length; ++i)
|
|
a[i] = fnc(i, a[i]);
|
|
return a;
|
|
}
|
|
|
|
//template<typename T>
|
|
//constexpr void foreach(const np::Array<T>& a, const void(&fnc)(const size_t&, const T&)) noexcept {
|
|
// return std::function<void(const size_t&, const T&)>(fnc);
|
|
//}
|
|
|
|
//template<typename T>
|
|
//constexpr void foreach(const np::Array<T>& a, const std::function<void(const size_t&, const T&)>& fnc) noexcept {
|
|
// for (size_t i = 0; i < a.length; ++i)
|
|
// fnc(i, a[i]);
|
|
//}
|
|
|
|
template<typename T>
|
|
__host__ __device__
|
|
constexpr inline static void swap(T* a, T* b) noexcept {
|
|
if (a == b) return;
|
|
const T temp = *a;
|
|
*a = *b;
|
|
*b = temp;
|
|
}
|
|
|
|
template<typename T>
|
|
static int32_t qs_partition(const np::Array<T>& a, const int32_t& l, const int32_t& h) noexcept {
|
|
int32_t i = l - 1;
|
|
for (int32_t j = l; j <= h; ++j)
|
|
if (a[j] < a[h])
|
|
swap(&a[++i], &a[j]);
|
|
swap(&a[++i], &a[h]);
|
|
return i;
|
|
}
|
|
|
|
template<typename T>
|
|
void quicksort(const np::Array<T>& a, const int32_t& l, const int32_t& h) noexcept {
|
|
if (l >= h)
|
|
return;
|
|
const int32_t p = qs_partition(a, l, h);
|
|
quicksort(a, l, p - 1);
|
|
quicksort(a, p + 1, h);
|
|
}
|
|
|
|
template<typename T>
|
|
void quicksort(const np::Array<T>& a) noexcept {
|
|
quicksort(a, 0, a.length - 1);
|
|
}
|
|
|
|
template<typename T>
|
|
static size_t as_partition(const T* a, uint16_t* indices, const size_t& l, const size_t& h) noexcept {
|
|
size_t i = l - 1;
|
|
for (size_t j = l; j <= h; ++j)
|
|
if (a[indices[j]] < a[indices[h]])
|
|
swap(&indices[++i], &indices[j]);
|
|
swap(&indices[++i], &indices[h]);
|
|
return i;
|
|
}
|
|
|
|
template<typename T>
|
|
void argsort(const T* a, uint16_t* indices, const size_t& l, const size_t& h) noexcept {
|
|
const size_t total = h - l + 1;
|
|
|
|
size_t* stack = new size_t[total]{l, h};
|
|
size_t top = 1, low = l, high = h;
|
|
|
|
while (top <= total) {
|
|
high = stack[top--];
|
|
low = stack[top--];
|
|
if(low >= high)
|
|
break;
|
|
|
|
const size_t p = as_partition(a, indices, low, high);
|
|
|
|
if (p - 1 > low && p - 1 < total) {
|
|
stack[++top] = low;
|
|
stack[++top] = p - 1;
|
|
}
|
|
|
|
if (p + 1 < high) {
|
|
stack[++top] = p + 1;
|
|
stack[++top] = high;
|
|
}
|
|
}
|
|
delete[] stack;
|
|
}
|
|
|
|
template<typename T>
|
|
np::Array<uint16_t> argsort(const np::Array<T>& other, const size_t& l, const size_t& h) noexcept {
|
|
np::Array<uint16_t> indices = np::empty(other.shape);
|
|
map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; });
|
|
|
|
argsort(other, indices, l, h);
|
|
return indices;
|
|
}
|
|
|
|
template<typename T>
|
|
np::Array<uint16_t> argsort(const np::Array<T>* other, const size_t& length) noexcept {
|
|
return argsort(other, 0, length - 1);
|
|
}
|
|
|
|
std::array<np::Array<uint8_t>, 4> load_datasets(void);
|
|
void print_error_file(const char*) noexcept;
|
|
|
|
template<typename T>
|
|
void save(const np::Array<T>& d, const char* filename) {
|
|
FILE* output = fopen(filename, "wb");
|
|
if (output == NULL) {
|
|
print_error_file(filename);
|
|
throw;
|
|
}
|
|
assert(d.shape.refcount != 0);//, "Refcount shape is zero !!");
|
|
fwrite(&d.shape.length, sizeof(size_t), 1, output);
|
|
fwrite(d.shape.data, sizeof(size_t), d.shape.length, output);
|
|
assert(d.refcount != 0);//, "Refcount array is zero !!");
|
|
fwrite(d.data, sizeof(T), np::prod(d.shape), output);
|
|
fclose(output);
|
|
}
|
|
|
|
template<typename T>
|
|
np::Array<T> load(const char* filename) {
|
|
FILE* input = fopen(filename, "rb");
|
|
if (input == NULL) {
|
|
print_error_file(filename);
|
|
throw;
|
|
}
|
|
size_t length = 0;
|
|
if(!fread(&length, sizeof(size_t), 1, input)){
|
|
print_error_file(filename);
|
|
fclose(input);
|
|
throw;
|
|
}
|
|
size_t* data = new size_t[length];
|
|
if(!fread(data, sizeof(size_t), length, input)){
|
|
print_error_file(filename);
|
|
fclose(input);
|
|
throw;
|
|
}
|
|
np::Array<T> d = np::empty<T>(np::Shape(length, data));
|
|
if(!fread(d.data, sizeof(T), np::prod(d.shape), input)){
|
|
print_error_file(filename);
|
|
fclose(input);
|
|
throw;
|
|
}
|
|
fclose(input);
|
|
return d;
|
|
}
|
|
|
|
#ifdef __CUDACC__
|
|
template<typename T>
|
|
np::Array<T> copyToDevice(const char* name, const np::Array<T>& array) noexcept {
|
|
const size_t array_size = np::prod(array.shape) * sizeof(T);
|
|
const size_t shape_size = array.shape.length * sizeof(size_t);
|
|
np::Array<T> d_array;
|
|
//_print_cuda_error_(name, cudaMalloc(&d_array.refcount, sizeof(size_t)));
|
|
_print_cuda_error_(name, cudaMalloc(&d_array.data, array_size));
|
|
//_print_cuda_error_(name, cudaMalloc(&d_array.shape.refcount, sizeof(size_t)));
|
|
_print_cuda_error_(name, cudaMalloc(&d_array.shape.data, shape_size));
|
|
d_array.shape.length = array.shape.length;
|
|
//_print_cuda_error_(name, cudaMemcpy(d_array.refcount, array.refcount, sizeof(size_t), cudaMemcpyHostToDevice));
|
|
_print_cuda_error_(name, cudaMemcpy(d_array.data, array.data, array_size, cudaMemcpyHostToDevice));
|
|
//_print_cuda_error_(name, cudaMemcpy(d_array.shape.refcount, array.shape.refcount, sizeof(size_t), cudaMemcpyHostToDevice));
|
|
_print_cuda_error_(name, cudaMemcpy(d_array.shape.data, array.shape.data, shape_size, cudaMemcpyHostToDevice));
|
|
#ifdef __DEBUG
|
|
d_array.shape.total = np::prod(array.shape);
|
|
#endif
|
|
return d_array;
|
|
}
|
|
|
|
template<typename T>
|
|
constexpr void cudaFree(const char* name, np::Array<T>& array) noexcept {
|
|
//_print_cuda_error_(name, cudaFree(array.refcount));
|
|
//array.refcount = nullptr;
|
|
_print_cuda_error_(name, cudaFree(array.data));
|
|
array.data = nullptr;
|
|
//_print_cuda_error_(name, cudaFree(array.shape.refcount));
|
|
//array.shape.refcount = nullptr;
|
|
_print_cuda_error_(name, cudaFree(array.shape.data));
|
|
array.shape.data = nullptr;
|
|
}
|
|
|
|
constexpr inline void _print_cuda_error_(const char* name, const cudaError_t& err) noexcept {
|
|
if (err != cudaSuccess) fprintf(stderr, "Error: %s = %d : %s\n", name, err, cudaGetErrorString(err));
|
|
}
|
|
#endif
|
|
|
|
int print(const np::Shape&) noexcept;
|
|
int print(const np::Array<uint8_t>&) noexcept;
|
|
int print(const np::Array<float64_t>&) noexcept;
|
|
int print(const np::Array<uint8_t>&, const np::Slice&) noexcept;
|
|
int print(const np::Array<uint32_t>&, const np::Slice&) noexcept;
|
|
int print(const np::Array<int32_t>&, const np::Slice&) noexcept;
|
|
int print(const np::Array<uint16_t>&, const np::Slice&) noexcept;
|
|
int print_feat(const np::Array<uint8_t>&, const np::Slice&) noexcept;
|