import numpy as np from numba import cuda, config, njit config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 #import matplotlib.pyplot as plt from tqdm import tqdm from time import perf_counter_ns from toolbox import format_time_ns from pickle import load, dump from sys import argv def get(a): with open(f"{a}.pkl", 'rb') as f: return load(f) def save(a, name) -> None: with open(name, 'wb') as f: dump(a, f) def diff(folder, a, label1, label2): af, bf = get(f"{folder}/{a}_{label1}"), get(f"{folder}/{a}_{label2}") #print(af) #print(bf) print((af - bf).mean()) if __name__ == "__main__": if len(argv) == 5: diff(argv[1], argv[4], argv[2], argv[3]) def py_mean(a, b): s = 0.0 for a_i, b_i in zip(a, b): s += a_i * b_i return s / a.shape[0] def np_mean(a, b): return np.mean(a * b) @njit('float64(float64[:], float64[:])', fastmath = True, nogil = True) def nb_mean(a, b): return np.mean(a * b) @njit('float64(float64[:], float64[:])', fastmath = True, nogil = True) def nb_mean_loop(a, b): s = 0.0 for a_i, b_i in zip(a, b): s += a_i * b_i return s / a.shape[0] @cuda.jit('void(float64[:], float64[:], float64[:])', fastmath = True) def cuda_mean_kernel(r, a, b): s = 0.0 for a_i, b_i in zip(a, b): s += a_i * b_i r[0] = s / a.shape[0] def cuda_mean(a, b): r = cuda.to_device(np.empty(1, dtype = np.float64)) d_a = cuda.to_device(a) d_b = cuda.to_device(b) cuda_mean_kernel[1, 1](r, d_a, d_b) return r.copy_to_host()[0] def test_and_compare(labels, fncs, a, b): m = [] for fnc in tqdm(fncs, leave = False, desc = "Calculating..."): s = perf_counter_ns() m.append([fnc(a, b), perf_counter_ns() - s]) print("Results:") [print(f"\t{label:<10} {m_i:<20} {format_time_ns(time_i)}") for ((m_i, time_i), label) in zip(m, labels)] print("Comparaison:") for i, (m_i, label_i) in enumerate(zip(m, labels)): for j, (m_j, label_j) in enumerate(zip(m, labels)): if i >= j: continue print(f"\t{label_i:<10} vs {label_j:<10} - {abs(m_i[0] - m_j[0])}") if __name__ == "__main__": np.set_printoptions(linewidth = 10000, threshold = 1000) N = int(2**20) labels = ["Python", "Numpy", "Numba", "Numba loop", "CUDA"] fncs = [py_mean, np_mean, nb_mean, nb_mean_loop, cuda_mean] print(f"RANDOM for N={N}") total_size = (2 * 8 * N) print(f"Size = {total_size} B") print(f"Size = {total_size // 1024} kB") print(f"Size = {total_size // 1024 // 1024} MB") print(f"Size = {total_size // 1024 // 1024 // 1024} GB") a, b = np.random.rand(N).astype(np.float64), np.random.rand(N).astype(np.float64) test_and_compare(labels, fncs, a, b) del a, b print(f"\nDETERMINSTIC for N={N}") total_size = (2 * 8 * N) + (8 * N) print(f"Size = {total_size} B") print(f"Size = {total_size // 1024} kB") print(f"Size = {total_size // 1024 // 1024} MB") print(f"Size = {total_size // 1024 // 1024 // 1024} GB") mask = np.arange(N, dtype = np.uint64) a = np.ones(N, dtype = np.float64) a[mask < N//2] = 0.1 del mask b = np.ones(N, dtype = np.float64) test_and_compare(labels, fncs, a, b) del a, b #from ViolaJonesGPU import argsort as argsort_GPU #from ViolaJonesCPU import argsort as argsort_CPU #from toolbox import unit_test_argsort_2d, benchmark_function #labels = ["Numpy", "Numba", "CUDA"] #a = np.random.randint(2**12, size = (2**20, 2**8), dtype = np.int32) #m = [benchmark_function(f"Argsort {label}", lambda: f(np.copy(a))) for (label, f) in zip(labels, [ # lambda a: np.argsort(a).astype(np.uint16), argsort_CPU, argsort_GPU #])] #for i, (m_i, label_i) in enumerate(zip(m, labels)): # #for j, (m_j, label_j) in enumerate(zip(m, labels)): # # if i >= j: # # continue # # print(f"\t{label_i:<10} vs {label_j:<10} - {(m_i == m_j).mean()}") # benchmark_function(f"Unit test {label_i}", lambda: unit_test_argsort_2d(a, m_i)) #for i in tqdm(range(X.shape[0]), leave = False, desc = "Extract image"): # x = X[i] # y = Y[i] # fig = plt.figure() # plt.imshow(x, cmap = 'gray') # plt.savefig(f"imgs/{y}/{i}.png") # plt.close(fig) #def extract_FD(Xy): # X_c, Y_c = [], [] # for x,y in Xy: # X_c.append(x) # Y_c.append(y) # X_c = np.asarray(X_c) # Y_c = np.asarray(Y_c) # return X_c, Y_c #X_train, y_train = get('out/X_train'), get('out/y_train') #X_test, y_test = get('out/X_test'), get('out/y_test') #X_train, y_train = extract_FD(get('/home/_aspil0w/git/FaceDetection/training')) #X_test, y_test = extract_FD(get('/home/_aspil0w/git/FaceDetection/test')) #save(X_train, 'out/X_train'), save(y_train, 'out/y_train') #save(X_test, 'out/X_test'), save(y_test, 'out/y_test') #print(X_train.shape, X_train_org.shape, X_train.shape == X_train_org.shape) #print((X_train == X_train_org).mean()) #print(y_train.shape, y_train_org.shape, y_train.shape == y_train_org.shape) #print((y_train == y_train_org).mean()) #print(X_test.shape, X_test_org.shape, X_test.shape == X_test_org.shape) #print((X_test == X_test_org).mean()) #print(y_test.shape, y_test_org.shape, y_test.shape == y_test_org.shape) #print((y_test == y_test_org).mean()) #@njit('uint16[:](uint8[:, :, :], uint8[:, :, :])') #def arg_find(X, X_org): # arg = np.empty(X.shape[0], dtype = np.uint16) # for i, x in enumerate(X_org): # found = False # for j, x_org in enumerate(X): # if np.all(x == x_org): # arg[i] = j # found = True # break # assert found, "Image not found" # return arg #print("Arg find results train") #arg_train = arg_find(X_train, X_train_org) #print((X_train[arg_train] == X_train_org).mean()) #print((y_train[arg_train] == y_train_org).mean()) #print("Arg find results test") #arg_test = arg_find(X_test, X_test_org) #print((X_test[arg_test] == X_test_org).mean()) #print((y_test[arg_test] == y_test_org).mean()) #for i in tqdm(range(X_c.shape[0]), leave = False, desc = "Extract image"): # x = X_c[i] # y = Y_c[i] # fig = plt.figure() # plt.imshow(x, cmap = 'gray') # plt.savefig(f"imgs2/{y}/{i}.png") # plt.close(fig)