190 lines
5.8 KiB
Python
190 lines
5.8 KiB
Python
import numpy as np
|
|
from numba import cuda, config, njit
|
|
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
|
|
#import matplotlib.pyplot as plt
|
|
from tqdm import tqdm
|
|
from time import perf_counter_ns
|
|
from toolbox import format_time_ns
|
|
from pickle import load, dump
|
|
from sys import argv
|
|
|
|
def get(a):
|
|
with open(f"{a}.pkl", 'rb') as f:
|
|
return load(f)
|
|
|
|
def save(a, name) -> None:
|
|
with open(name, 'wb') as f:
|
|
dump(a, f)
|
|
|
|
def diff(folder, a, label1, label2):
|
|
af, bf = get(f"{folder}/{a}_{label1}"), get(f"{folder}/{a}_{label2}")
|
|
#print(af)
|
|
#print(bf)
|
|
print((af - bf).mean())
|
|
|
|
if __name__ == "__main__":
|
|
if len(argv) == 5:
|
|
diff(argv[1], argv[4], argv[2], argv[3])
|
|
|
|
def py_mean(a, b):
|
|
s = 0.0
|
|
for a_i, b_i in zip(a, b):
|
|
s += a_i * b_i
|
|
return s / a.shape[0]
|
|
|
|
def np_mean(a, b):
|
|
return np.mean(a * b)
|
|
|
|
@njit('float64(float64[:], float64[:])', fastmath = True, nogil = True)
|
|
def nb_mean(a, b):
|
|
return np.mean(a * b)
|
|
|
|
@njit('float64(float64[:], float64[:])', fastmath = True, nogil = True)
|
|
def nb_mean_loop(a, b):
|
|
s = 0.0
|
|
for a_i, b_i in zip(a, b):
|
|
s += a_i * b_i
|
|
return s / a.shape[0]
|
|
|
|
@cuda.jit('void(float64[:], float64[:], float64[:])', fastmath = True)
|
|
def cuda_mean_kernel(r, a, b):
|
|
s = 0.0
|
|
for a_i, b_i in zip(a, b):
|
|
s += a_i * b_i
|
|
r[0] = s / a.shape[0]
|
|
|
|
def cuda_mean(a, b):
|
|
r = cuda.to_device(np.empty(1, dtype = np.float64))
|
|
d_a = cuda.to_device(a)
|
|
d_b = cuda.to_device(b)
|
|
cuda_mean_kernel[1, 1](r, d_a, d_b)
|
|
return r.copy_to_host()[0]
|
|
|
|
def test_and_compare(labels, fncs, a, b):
|
|
m = []
|
|
for fnc in tqdm(fncs, leave = False, desc = "Calculating..."):
|
|
s = perf_counter_ns()
|
|
m.append([fnc(a, b), perf_counter_ns() - s])
|
|
print("Results:")
|
|
[print(f"\t{label:<10} {m_i:<20} {format_time_ns(time_i)}") for ((m_i, time_i), label) in zip(m, labels)]
|
|
print("Comparaison:")
|
|
for i, (m_i, label_i) in enumerate(zip(m, labels)):
|
|
for j, (m_j, label_j) in enumerate(zip(m, labels)):
|
|
if i >= j:
|
|
continue
|
|
print(f"\t{label_i:<10} vs {label_j:<10} - {abs(m_i[0] - m_j[0])}")
|
|
|
|
if __name__ == "__main__":
|
|
np.set_printoptions(linewidth = 10000, threshold = 1000)
|
|
|
|
N = int(2**20)
|
|
labels = ["Python", "Numpy", "Numba", "Numba loop", "CUDA"]
|
|
fncs = [py_mean, np_mean, nb_mean, nb_mean_loop, cuda_mean]
|
|
|
|
print(f"RANDOM for N={N}")
|
|
total_size = (2 * 8 * N)
|
|
print(f"Size = {total_size} B")
|
|
print(f"Size = {total_size // 1024} kB")
|
|
print(f"Size = {total_size // 1024 // 1024} MB")
|
|
print(f"Size = {total_size // 1024 // 1024 // 1024} GB")
|
|
a, b = np.random.rand(N).astype(np.float64), np.random.rand(N).astype(np.float64)
|
|
test_and_compare(labels, fncs, a, b)
|
|
del a, b
|
|
|
|
print(f"\nDETERMINSTIC for N={N}")
|
|
total_size = (2 * 8 * N) + (8 * N)
|
|
print(f"Size = {total_size} B")
|
|
print(f"Size = {total_size // 1024} kB")
|
|
print(f"Size = {total_size // 1024 // 1024} MB")
|
|
print(f"Size = {total_size // 1024 // 1024 // 1024} GB")
|
|
mask = np.arange(N, dtype = np.uint64)
|
|
a = np.ones(N, dtype = np.float64)
|
|
a[mask < N//2] = 0.1
|
|
del mask
|
|
b = np.ones(N, dtype = np.float64)
|
|
test_and_compare(labels, fncs, a, b)
|
|
del a, b
|
|
|
|
#from ViolaJonesGPU import argsort as argsort_GPU
|
|
#from ViolaJonesCPU import argsort as argsort_CPU
|
|
#from toolbox import unit_test_argsort_2d, benchmark_function
|
|
|
|
#labels = ["Numpy", "Numba", "CUDA"]
|
|
#a = np.random.randint(2**12, size = (2**20, 2**8), dtype = np.int32)
|
|
#m = [benchmark_function(f"Argsort {label}", lambda: f(np.copy(a))) for (label, f) in zip(labels, [
|
|
# lambda a: np.argsort(a).astype(np.uint16), argsort_CPU, argsort_GPU
|
|
#])]
|
|
#for i, (m_i, label_i) in enumerate(zip(m, labels)):
|
|
# #for j, (m_j, label_j) in enumerate(zip(m, labels)):
|
|
# # if i >= j:
|
|
# # continue
|
|
# # print(f"\t{label_i:<10} vs {label_j:<10} - {(m_i == m_j).mean()}")
|
|
# benchmark_function(f"Unit test {label_i}", lambda: unit_test_argsort_2d(a, m_i))
|
|
|
|
#for i in tqdm(range(X.shape[0]), leave = False, desc = "Extract image"):
|
|
# x = X[i]
|
|
# y = Y[i]
|
|
# fig = plt.figure()
|
|
# plt.imshow(x, cmap = 'gray')
|
|
# plt.savefig(f"imgs/{y}/{i}.png")
|
|
# plt.close(fig)
|
|
|
|
#def extract_FD(Xy):
|
|
# X_c, Y_c = [], []
|
|
# for x,y in Xy:
|
|
# X_c.append(x)
|
|
# Y_c.append(y)
|
|
# X_c = np.asarray(X_c)
|
|
# Y_c = np.asarray(Y_c)
|
|
# return X_c, Y_c
|
|
|
|
#X_train, y_train = get('out/X_train'), get('out/y_train')
|
|
#X_test, y_test = get('out/X_test'), get('out/y_test')
|
|
|
|
#X_train, y_train = extract_FD(get('/home/_aspil0w/git/FaceDetection/training'))
|
|
#X_test, y_test = extract_FD(get('/home/_aspil0w/git/FaceDetection/test'))
|
|
#save(X_train, 'out/X_train'), save(y_train, 'out/y_train')
|
|
#save(X_test, 'out/X_test'), save(y_test, 'out/y_test')
|
|
|
|
#print(X_train.shape, X_train_org.shape, X_train.shape == X_train_org.shape)
|
|
#print((X_train == X_train_org).mean())
|
|
#print(y_train.shape, y_train_org.shape, y_train.shape == y_train_org.shape)
|
|
#print((y_train == y_train_org).mean())
|
|
|
|
#print(X_test.shape, X_test_org.shape, X_test.shape == X_test_org.shape)
|
|
#print((X_test == X_test_org).mean())
|
|
#print(y_test.shape, y_test_org.shape, y_test.shape == y_test_org.shape)
|
|
#print((y_test == y_test_org).mean())
|
|
|
|
#@njit('uint16[:](uint8[:, :, :], uint8[:, :, :])')
|
|
#def arg_find(X, X_org):
|
|
# arg = np.empty(X.shape[0], dtype = np.uint16)
|
|
# for i, x in enumerate(X_org):
|
|
# found = False
|
|
# for j, x_org in enumerate(X):
|
|
# if np.all(x == x_org):
|
|
# arg[i] = j
|
|
# found = True
|
|
# break
|
|
# assert found, "Image not found"
|
|
# return arg
|
|
|
|
#print("Arg find results train")
|
|
#arg_train = arg_find(X_train, X_train_org)
|
|
#print((X_train[arg_train] == X_train_org).mean())
|
|
#print((y_train[arg_train] == y_train_org).mean())
|
|
|
|
#print("Arg find results test")
|
|
#arg_test = arg_find(X_test, X_test_org)
|
|
#print((X_test[arg_test] == X_test_org).mean())
|
|
#print((y_test[arg_test] == y_test_org).mean())
|
|
|
|
#for i in tqdm(range(X_c.shape[0]), leave = False, desc = "Extract image"):
|
|
# x = X_c[i]
|
|
# y = Y_c[i]
|
|
# fig = plt.figure()
|
|
# plt.imshow(x, cmap = 'gray')
|
|
# plt.savefig(f"imgs2/{y}/{i}.png")
|
|
# plt.close(fig)
|
|
|