Added files

This commit is contained in:
saundersp
2023-05-07 20:15:55 +02:00
parent c8e56c1277
commit e6194ac485
30 changed files with 4682 additions and 0 deletions

34
python/Makefile Normal file
View File

@ -0,0 +1,34 @@
DATA := ../data/X_train.bin ../data/X_test.bin ../data/y_train.bin ../data/y_test.bin
.PHONY: all start reset
all: ${DATA}
${DATA}:
@bash ../download_data.sh ..
venv:
@bash -c 'source activate.sh'
start: ${DATA} venv
@bash -c 'source activate.sh && python projet.py'
reset:
@echo Deleting generated states and models
@rm -rf out/* models/* | true
debug:
@bash -c 'source activate.sh && pudb projet.py'
profile:
@bash -c 'source activate.sh && python -m cProfile -o prof.out projet.py && gprof2dot -f pstats prof.out | dot -Tpng -o output.png'
mrproper: reset
@rm -r __pycache__ venv
test:
@bash -c 'source activate.sh && ls out | sed s/.pkl// | xargs -n1 python test_diff.py out'
@bash -c 'source activate.sh && ls models | sed s/.pkl// | xargs -n1 python test_diff.py models'
help:
@echo "all start reset mrproper help"

182
python/ViolaJones.py Normal file
View File

@ -0,0 +1,182 @@
from typing import Tuple, Iterable
from tqdm import tqdm
import numpy as np
import config
if config.GPU_BOOSTED:
from ViolaJonesGPU import train_weak_clf
else:
from ViolaJonesCPU import train_weak_clf
if config.COMPILE_WITH_C:
from numba import njit
@njit
def tqdm_iter(iter: Iterable, _: str):
return iter
else:
from decorators import njit, tqdm_iter
@njit('uint8[:, :, :, :](uint16, uint16)')
def build_features(width: int, height: int) -> np.ndarray:
"""Initialize the features base on the input shape.
Args:
shape (Tuple[int, int]): Shape of the image (Width, Height).
Returns:
np.ndarray: The initialized features.
"""
feats = []
empty = (0, 0, 0, 0)
for w in range(1, width + 1):
for h in range(1, height + 1):
for i in range(width - w):
for j in range(height - h):
# 2 rectangle features
immediate = (i, j, w, h)
right = (i + w, j, w, h)
if i + 2 * w < width: # Horizontally Adjacent
feats.append(([right, empty], [immediate, empty]))
bottom = (i, j + h, w, h)
if j + 2 * h < height: # Vertically Adjacent
feats.append((([immediate, empty], [bottom, empty])))
right_2 = (i + 2 * w, j, w, h)
# 3 rectangle features
if i + 3 * w < width: # Horizontally Adjacent
feats.append((([right, empty], [right_2, immediate])))
bottom_2 = (i, j + 2 * h, w, h)
if j + 3 * h < height: # Vertically Adjacent
feats.append((([bottom, empty], [bottom_2, immediate])))
# 4 rectangle features
bottom_right = (i + w, j + h, w, h)
if i + 2 * w < width and j + 2 * h < height:
feats.append((([right, bottom], [immediate, bottom_right])))
return np.asarray(feats, dtype = np.uint8)
@njit('float64[:](uint8[:])')
def init_weights(y_train: np.ndarray) -> np.ndarray:
"""Initialize the weights of the weak classifiers based on the training labels.
Args:
y_train (np.ndarray): Training labels.
Returns:
np.ndarray: The initialized weights.
"""
weights = np.empty_like(y_train, dtype = np.float64)
t = y_train.sum()
weights[y_train == 0] = 1.0 / (2 * t)
weights[y_train == 1] = 1.0 / (2 * (y_train.shape[0] - t))
return weights
@njit('int8[:](int32[:], int32, int32)')
def classify_weak_clf(x_feat_i: np.ndarray, threshold: int, polarity: int) -> np.ndarray:
"""Classify the integrated features based on polarity and threshold.
Args:
x_feat_i (np.ndarray): Integrated features.
threshold (int): Trained threshold.
polarity (int): Trained polarity.
Returns:
np.ndarray: Classified features.
"""
res = np.zeros_like(x_feat_i, dtype = np.int8)
res[polarity * x_feat_i < polarity * threshold] = 1
return res
@njit('Tuple((int32, float64, float64[:]))(int32[:, :], float64[:], int32[:, :], uint8[:])')
def select_best(classifiers: np.ndarray, weights: np.ndarray, X_feat: np.ndarray, y: np.ndarray) -> Tuple[int, float, np.ndarray]:
"""Select the best classifier given theirs predictions.
Args:
classifiers (np.ndarray): The weak classifiers.
weights (np.ndarray): Trained weights of each classifiers.
X_feat (np.ndarray): Integrated features.
y (np.ndarray): Features labels.
Returns:
Tuple[int, float, np.ndarray]: Index of the best classifier, the best error and the best accuracy
"""
best_clf, best_error, best_accuracy = 0, np.inf, np.empty(X_feat.shape[1], dtype = np.float64)
for j, (threshold, polarity) in enumerate(tqdm_iter(classifiers, "Selecting best classifiers")):
accuracy = np.abs(classify_weak_clf(X_feat[j], threshold, polarity) - y).astype(np.float64)
error = np.mean(weights * accuracy)
if error < best_error:
best_clf, best_error, best_accuracy = j, error, accuracy
return best_clf, best_error, best_accuracy
#@njit('Tuple((float64[:], int32[:, :]))(uint16, int32[:, :], uint16[:, :], uint8[:])')
def train_viola_jones(T: int, X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Train the weak classifiers.
Args:
T (int): Number of weak classifiers.
X_feat (np.ndarray): Integrated features.
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
y (np.ndarray): Features labels.
Returns:
Tuple[np.ndarray, np.ndarray]: List of trained alphas and the list of the final classifiers.
"""
weights = init_weights(y)
alphas, final_classifier = np.empty(T, dtype = np.float64), np.empty((T, 3), dtype = np.int32)
#for t in tqdm_iter(range(T), "Training ViolaJones"):
for t in tqdm(range(T), desc = "Training ViolaJones", leave = False):
weights /= weights.sum()
classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights)
clf, error, accuracy = select_best(classifiers, weights, X_feat, y)
beta = error / (1.0 - error)
weights *= beta ** (1.0 - accuracy)
alphas[t] = np.log(1.0 / beta)
final_classifier[t] = (clf, classifiers[clf][0], classifiers[clf][1])
return alphas, final_classifier
@njit('uint8[:](float64[:], int32[:, :], int32[:, :])')
def classify_viola_jones(alphas: np.ndarray, classifiers: np.ndarray, X_feat: np.ndarray) -> np.ndarray:
"""Classify the trained classifiers on the given features.
Args:
alphas (np.ndarray): Trained alphas.
classifiers (np.ndarray): Trained classifiers.
X_feat (np.ndarray): Integrated features.
Returns:
np.ndarray: Classification results.
"""
total = np.zeros(X_feat.shape[1], dtype = np.float64)
for i, alpha in enumerate(tqdm_iter(alphas, "Classifying ViolaJones")):
(j, threshold, polarity) = classifiers[i]
total += alpha * classify_weak_clf(X_feat[j], threshold, polarity)
y_pred = np.zeros(X_feat.shape[1], dtype = np.uint8)
y_pred[total >= 0.5 * np.sum(alphas)] = 1
return y_pred
@njit
def get_best_anova_features(X: np.ndarray, y: np.ndarray) -> np.ndarray:
#SelectPercentile(f_classif, percentile = 10).fit(X, y).get_support(indices = True)
classes = [X.T[y == 0].astype(np.float64), X.T[y == 1].astype(np.float64)]
n_samples_per_class = np.asarray([classes[0].shape[0], classes[1].shape[0]])
n_samples = classes[0].shape[0] + classes[1].shape[0]
ss_alldata = (classes[0] ** 2).sum(axis = 0) + (classes[1] ** 2).sum(axis = 0)
sums_classes = [np.asarray(classes[0].sum(axis = 0)), np.asarray(classes[1].sum(axis = 0))]
sq_of_sums_alldata = (sums_classes[0] + sums_classes[1]) ** 2
sq_of_sums_args = [sums_classes[0] ** 2, sums_classes[1] ** 2]
ss_tot = ss_alldata - sq_of_sums_alldata / n_samples
sqd_sum_bw_n = sq_of_sums_args[0] / n_samples_per_class[0] + \
sq_of_sums_args[1] / n_samples_per_class[1] - sq_of_sums_alldata / n_samples
ss_wn = ss_tot - sqd_sum_bw_n
df_wn = n_samples - 2
msw = ss_wn / df_wn
f_values = sqd_sum_bw_n / msw
return np.sort(np.argsort(f_values)[::-1][: int(np.ceil(X.shape[0] / 10.0))])

168
python/ViolaJonesCPU.py Normal file
View File

@ -0,0 +1,168 @@
from config import COMPILE_WITH_C
from typing import Iterable
from numba import int32, float64
import numpy as np
if COMPILE_WITH_C:
from numba import njit
@njit
def tqdm_iter(iter: Iterable, _: str):
return iter
else:
from decorators import njit, tqdm_iter
import sys
sys.setrecursionlimit(10000)
@njit('uint32[:, :, :](uint8[:, :, :])')
def set_integral_image(X: np.ndarray) -> np.ndarray:
"""Transform the input images in integrated images (CPU version).
Args:
X (np.ndarray): Dataset of images.
Returns:
np.ndarray: Dataset of integrated images.
"""
X_ii = np.empty_like(X, dtype = np.uint32)
for i, Xi in enumerate(tqdm_iter(X, "Applying integral image")):
ii = np.zeros_like(Xi, dtype = np.uint32)
for y in range(1, Xi.shape[0]):
s = 0
for x in range(Xi.shape[1] - 1):
s += Xi[y - 1, x]
ii[y, x + 1] = s + ii[y - 1, x + 1]
X_ii[i] = ii
return X_ii
@njit('uint32(uint32[:, :], int16, int16, int16, int16)')
def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
"""Compute a feature on an integrated image at a specific coordinate (CPU version).
Args:
ii (np.ndarray): Integrated image.
x (int): X coordinate.
y (int): Y coordinate.
w (int): width of the feature.
h (int): height of the feature.
Returns:
int: Computed feature.
"""
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
"""Apply the features on a integrated image dataset (CPU version).
Args:
feats (np.ndarray): Features to apply.
X_ii (np.ndarray): Integrated image dataset.
Returns:
np.ndarray: Applied features.
"""
X_feat = np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32)
for i, (p, n) in enumerate(tqdm_iter(feats, "Applying features")):
for j, x_i in enumerate(X_ii):
p_x, p_y, p_w, p_h = p[0]
p1_x, p1_y, p1_w, p1_h = p[1]
n_x, n_y, n_w, n_h = n[0]
n1_x, n1_y, n1_w, n1_h = n[1]
p1 = __compute_feature__(x_i, p_x, p_y, p_w, p_h) + __compute_feature__(x_i, p1_x, p1_y, p1_w, p1_h)
n1 = __compute_feature__(x_i, n_x, n_y, n_w, n_h) + __compute_feature__(x_i, n1_x, n1_y, n1_w, n1_h)
X_feat[i, j] = int32(p1) - int32(n1)
return X_feat
@njit('int32[:, :](int32[:, :], uint16[:, :], uint8[:], float64[:])')
def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray, weights: np.ndarray) -> np.ndarray:
"""Train the weak classifiers on a given dataset (CPU version).
Args:
X_feat (np.ndarray): Feature images dataset.
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
y (np.ndarray): Labels of the features.
weights (np.ndarray): Weights of the features.
Returns:
np.ndarray: Trained weak classifiers.
"""
total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum()
classifiers = np.empty((X_feat.shape[0], 2), dtype = np.int32)
for i, feature in enumerate(tqdm_iter(X_feat, "Training weak classifiers")):
pos_seen, neg_seen = 0, 0
pos_weights, neg_weights = 0, 0
min_error, best_threshold, best_polarity = float64(np.inf), 0, 0
for j in X_feat_argsort[i]:
error = min(neg_weights + total_pos - pos_weights, pos_weights + total_neg - neg_weights)
if error < min_error:
min_error = error
best_threshold = feature[j]
best_polarity = 1 if pos_seen > neg_seen else -1
if y[j] == 1:
pos_seen += 1
pos_weights += weights[j]
else:
neg_seen += 1
neg_weights += weights[j]
classifiers[i] = (best_threshold, best_polarity)
return classifiers
@njit('int32(int32[:], uint16[:], int32, int32)')
def as_partition(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> int:
i = l - 1
j = l
for j in range(l, h + 1):
if a[indices[j]] < a[indices[h]]:
i += 1
indices[i], indices[j] = indices[j], indices[i]
i += 1
indices[i], indices[j] = indices[j], indices[i]
return i
@njit('void(int32[:], uint16[:], int32, int32)')
def argsort_bounded(a: np.ndarray, indices: np.ndarray, l: int, h: int):
total = h - l + 1;
stack = np.empty((total,), dtype = np.int32)
stack[0] = l
stack[1] = h
top = 1;
low = l
high = h
while top >= 0:
high = stack[top]
top -= 1
low = stack[top]
top -= 1
if low >= high:
break;
p = as_partition(a, indices, low, high);
if p - 1 > low:
top += 1
stack[top] = low;
top += 1
stack[top] = p - 1;
if p + 1 < high:
top += 1
stack[top] = p + 1;
top += 1
stack[top] = high;
@njit('uint16[:, :](int32[:, :])')
def argsort(X_feat: np.ndarray) -> np.ndarray:
indices = np.empty_like(X_feat, dtype = np.uint16)
indices[:, :] = np.arange(indices.shape[1])
for i in tqdm_iter(range(X_feat.shape[0]), "argsort"):
argsort_bounded(X_feat[i], indices[i], 0, X_feat[i].shape[0] - 1)
return indices

372
python/ViolaJonesGPU.py Normal file
View File

@ -0,0 +1,372 @@
from numba import float64, uint32, cuda, int32, uint16
from config import COMPILE_WITH_C
import numpy as np
NB_THREADS = 1024
NB_THREADS_2D = (32, 32)
NB_THREADS_3D = (16, 16, 4)
M = int(np.log2(NB_THREADS_2D[1]))
if COMPILE_WITH_C:
from numba import njit
else:
from decorators import njit
@njit('uint32[:, :, :](uint32[:, :, :])')
def __scanCPU_3d__(X: np.ndarray) -> np.ndarray:
"""Prefix Sum (scan) of a given dataset.
Args:
X (np.ndarray): Dataset of images to apply sum.
Returns:
np.ndarray: Scanned dataset of images.
"""
for x in range(X.shape[0]):
for y in range(X.shape[1]):
cum = 0
for z in range(X.shape[2]):
cum += X[x, y, z]
X[x, y, z] = cum - X[x, y, z]
return X
@cuda.jit('void(uint16, uint16, uint32[:, :, :], uint32[:, :, :])')
def __kernel_scan_3d__(n: int, j: int, d_inter: np.ndarray, d_a: np.ndarray) -> None:
"""GPU kernel used to do a parallel prefix sum (scan).
Args:
n (int):
j (int): [description]
d_inter (np.ndarray): [description]
d_a (np.ndarray): [description]
"""
x_coor, y_coor = cuda.grid(2)
sA = cuda.shared.array(NB_THREADS_2D, uint32)
sA[cuda.threadIdx.x, cuda.threadIdx.y] = d_a[cuda.blockIdx.z, y_coor, x_coor] if x_coor < n and y_coor < j else 0
cuda.syncthreads()
k = cuda.threadIdx.x
for d in range(M):
k *= 2
i1 = k + 2**d - 1
i2 = k + 2**(d + 1) - 1
if i2 >= cuda.blockDim.x:
break
sA[i2, cuda.threadIdx.y] += sA[i1, cuda.threadIdx.y]
cuda.syncthreads()
if cuda.threadIdx.x == 0:
d_inter[cuda.blockIdx.z, y_coor, cuda.blockIdx.x] = sA[cuda.blockDim.x - 1, cuda.threadIdx.y]
sA[cuda.blockDim.x - 1, cuda.threadIdx.y] = 0
cuda.syncthreads()
k = 2**(M + 1) * cuda.threadIdx.x
for d in range(M - 1, -1, -1):
k //= 2
i1 = k + 2**d - 1
i2 = k + 2**(d + 1) - 1
if i2 >= cuda.blockDim.x:
continue
t = sA[i1, cuda.threadIdx.y]
sA[i1, cuda.threadIdx.y] = sA[i2, cuda.threadIdx.y]
sA[i2, cuda.threadIdx.y] += t
cuda.syncthreads()
if x_coor < n and y_coor < j:
d_a[cuda.blockIdx.z, y_coor, x_coor] = sA[cuda.threadIdx.x, cuda.threadIdx.y]
@cuda.jit('void(uint32[:, :, :], uint32[:, :, :], uint16, uint16)')
def __add_3d__(d_X: np.ndarray, d_s: np.ndarray, n: int, m: int) -> None:
"""GPU kernel for parallel sum.
Args:
d_X (np.ndarray): Dataset of images.
d_s (np.ndarray): Temporary sums to add.
n (int): Number of width blocks.
m (int): Height of a block.
"""
x_coor, y_coor = cuda.grid(2)
if x_coor < n and y_coor < m:
d_X[cuda.blockIdx.z, y_coor, x_coor] += d_s[cuda.blockIdx.z, y_coor, cuda.blockIdx.x]
def __scanGPU_3d__(X: np.ndarray) -> np.ndarray:
"""Parallel Prefix Sum (scan) of a given dataset.
Read more: https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
Args:
X (np.ndarray): Dataset of images.
Returns:
np.ndarray: Scanned dataset of images.
"""
k, height, n = X.shape
n_block_x, n_block_y = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64)
d_X = cuda.to_device(X)
d_inter = cuda.to_device(np.empty((k, height, n_block_x), dtype = np.uint32))
__kernel_scan_3d__[(n_block_x, n_block_y, k), NB_THREADS_2D](n, height, d_inter, d_X)
cuda.synchronize()
inter = d_inter.copy_to_host()
if n_block_x >= NB_THREADS_2D[0]:
sums = __scanGPU_3d__(inter)
d_s = cuda.to_device(sums)
__add_3d__[(n_block_x, n_block_y, k), NB_THREADS_2D](d_X, d_s, n, height)
cuda.synchronize()
X_scan = d_X.copy_to_host()
else:
sums = __scanCPU_3d__(inter)
X_scan = d_X.copy_to_host()
for p in range(k):
for h in range(height):
for i in range(1, n_block_x):
for j in range(NB_THREADS_2D[1]):
idx = i * NB_THREADS_2D[1] + j
if idx < n:
X_scan[p, h, idx] += sums[p, h, i]
return X_scan
@cuda.jit('void(uint32[:, :, :], uint32[:, :, :])')
def __transpose_kernel__(d_X: np.ndarray, d_Xt: np.ndarray) -> None:
"""GPU kernel of the function __transpose_3d__.
Args:
d_X (np.ndarray): Dataset of images.
d_Xt(np.ndarray): Transposed dataset of images.
width (int): Width of each images in the dataset.
height (int): Height of each images in the dataset.
"""
temp = cuda.shared.array(NB_THREADS_2D, dtype = uint32)
x, y = cuda.grid(2)
if x < d_X.shape[1] and y < d_X.shape[2]:
temp[cuda.threadIdx.y, cuda.threadIdx.x] = d_X[cuda.blockIdx.z, x, y]
cuda.syncthreads()
x = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.x
y = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.y
if x < d_X.shape[2] and y < d_X.shape[1]:
d_Xt[cuda.blockIdx.z, x, y] = temp[cuda.threadIdx.x, cuda.threadIdx.y]
def __transpose_3d__(X: np.ndarray) -> np.ndarray:
"""Transpose every images in the given dataset.
Args:
X (np.ndarray): Dataset of images.
Returns:
np.ndarray: Transposed dataset of images.
"""
n_block_x, n_block_z = np.ceil(np.divide(X.shape[1:], NB_THREADS_2D)).astype(np.uint64)
d_X = cuda.to_device(X)
d_Xt = cuda.to_device(np.empty((X.shape[0], X.shape[2], X.shape[1]), dtype = X.dtype))
__transpose_kernel__[(n_block_x, n_block_z, X.shape[0]), NB_THREADS_2D](d_X, d_Xt)
return d_Xt.copy_to_host()
def set_integral_image(X: np.ndarray) -> np.ndarray:
"""Transform the input images in integrated images (GPU version).
Args:
X (np.ndarray): Dataset of images.
Returns:
np.ndarray: Dataset of integrated images.
"""
X = X.astype(np.uint32)
X = __scanGPU_3d__(X)
X = __transpose_3d__(X)
X = __scanGPU_3d__(X)
return __transpose_3d__(X)
@cuda.jit('void(int32[:, :], uint8[:], int32[:, :], uint16[:, :], float64[:], float64, float64)')
def __train_weak_clf_kernel__(d_classifiers: np.ndarray, d_y: np.ndarray, d_X_feat: np.ndarray, d_X_feat_argsort: np.ndarray,
d_weights: np.ndarray, total_pos: float, total_neg: float) -> None:
"""GPU kernel of the function train_weak_clf.
Args:
d_classifiers (np.ndarray): Weak classifiers to train.
d_y (np.ndarray): Labels of the features.
d_X_feat (np.ndarray): Feature images dataset.
d_X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
d_weights (np.ndarray): Weights of the features.
total_pos (float): Total of positive labels in the dataset.
total_neg (float): Total of negative labels in the dataset.
"""
i = cuda.blockIdx.x * cuda.blockDim.x * cuda.blockDim.y * cuda.blockDim.z
i += cuda.threadIdx.x * cuda.blockDim.y * cuda.blockDim.z
i += cuda.threadIdx.y * cuda.blockDim.z
i += cuda.threadIdx.z
if i >= d_classifiers.shape[0]:
return
pos_seen, neg_seen = 0, 0
pos_weights, neg_weights = 0.0, 0.0
min_error, best_threshold, best_polarity = float64(np.inf), 0, 0
for j in d_X_feat_argsort[i]:
error = min(neg_weights + total_pos - pos_weights, pos_weights + total_neg - neg_weights)
if error < min_error:
min_error = error
best_threshold = d_X_feat[i, j]
best_polarity = 1 if pos_seen > neg_seen else -1
if d_y[j] == 1:
pos_seen += 1
pos_weights += d_weights[j]
else:
neg_seen += 1
neg_weights += d_weights[j]
d_classifiers[i] = (best_threshold, best_polarity)
#@njit('int32[:, :](int32[:, :], uint16[:, :], uint8[:], float64[:])')
def train_weak_clf(X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray, weights: np.ndarray) -> np.ndarray:
"""Train the weak classifiers on a given dataset (GPU version).
Args:
X_feat (np.ndarray): Feature images dataset.
X_feat_argsort (np.ndarray): Sorted indexes of the integrated features.
y (np.ndarray): Labels of the features.
weights (np.ndarray): Weights of the features.
Returns:
np.ndarray: Trained weak classifiers.
"""
total_pos, total_neg = weights[y == 1].sum(), weights[y == 0].sum()
d_classifiers = cuda.to_device(np.empty((X_feat.shape[0], 2), dtype = np.int32))
d_X_feat = cuda.to_device(X_feat)
d_X_feat_argsort = cuda.to_device(X_feat_argsort)
d_weights = cuda.to_device(weights)
d_y = cuda.to_device(y)
n_blocks = np.ceil(X_feat.shape[0] / np.prod(NB_THREADS_3D)).astype(np.uint16)
__train_weak_clf_kernel__[n_blocks, NB_THREADS_3D](d_classifiers, d_y, d_X_feat, d_X_feat_argsort, d_weights, total_pos, total_neg)
return d_classifiers.copy_to_host()
@cuda.jit('uint32(uint32[:, :], int16, int16, int16, int16)', device = True)
def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
"""Compute a feature on an integrated image at a specific coordinate (GPU version).
Args:
ii (np.ndarray): Integrated image.
x (int): X coordinate.
y (int): Y coordinate.
w (int): width of the feature.
h (int): height of the feature.
Returns:
int: Computed feature.
"""
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
@cuda.jit('void(int32[:, :], uint8[:, :, :, :], uint32[:, :, :])')
def __apply_feature_kernel__(X_feat: np.ndarray, feats: np.ndarray, X_ii: np.ndarray) -> None:
"""GPU kernel of the function apply_features.
Args:
X_feat (np.ndarray): Feature images dataset.
feats (np.ndarray): Features to apply.
X_ii (np.ndarray): Integrated image dataset.
n (int): Number of features.
m (int): Number of images of the dataset.
"""
x, y = cuda.grid(2)
if x >= feats.shape[0] or y >= X_ii.shape[0]:
return
p_x, p_y, p_w, p_h = feats[x, 0, 0]
p1_x, p1_y, p1_w, p1_h = feats[x, 0, 1]
n_x, n_y, n_w, n_h = feats[x, 1, 0]
n1_x, n1_y, n1_w, n1_h = feats[x, 1, 1]
sP = __compute_feature__(X_ii[y], p_x, p_y, p_w, p_h) + \
__compute_feature__(X_ii[y], p1_x, p1_y, p1_w, p1_h)
sN = __compute_feature__(X_ii[y], n_x, n_y, n_w, n_h) + \
__compute_feature__(X_ii[y], n1_x, n1_y, n1_w, n1_h)
X_feat[x, y] = sP - sN
#@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
"""Apply the features on a integrated image dataset (GPU version).
Args:
feats (np.ndarray): Features to apply.
X_ii (np.ndarray): Integrated image dataset.
Returns:
np.ndarray: Applied features.
"""
d_X_feat = cuda.to_device(np.empty((feats.shape[0], X_ii.shape[0]), dtype = np.int32))
d_feats = cuda.to_device(feats)
d_X_ii = cuda.to_device(X_ii)
n_x_blocks, n_y_blocks = np.ceil(np.divide(d_X_feat.shape, NB_THREADS_2D)).astype(np.uint16)
__apply_feature_kernel__[(n_x_blocks, n_y_blocks), NB_THREADS_2D](d_X_feat, d_feats, d_X_ii)
cuda.synchronize()
return d_X_feat.copy_to_host()
@cuda.jit('int32(int32[:], uint16[:], int32, int32)', device = True)
def as_partition(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> int:
i = l - 1
j = l
for j in range(l, h + 1):
if a[indices[j]] < a[indices[h]]:
i += 1
indices[i], indices[j] = indices[j], indices[i]
i += 1
indices[i], indices[j] = indices[j], indices[i]
return i
@cuda.jit('void(int32[:], uint16[:], int32, int32)', device = True)
def argsort_bounded(a: np.ndarray, indices: np.ndarray, l: int, h: int) -> None:
#total = h - l + 1;
stack = cuda.local.array(6977, int32)
stack[0] = l
stack[1] = h
top = 1;
low = l
high = h
while top >= 0:
high = stack[top]
top -= 1
low = stack[top]
top -= 1
if low >= high:
break;
p = as_partition(a, indices, low, high);
if p - 1 > low:
top += 1
stack[top] = low;
top += 1
stack[top] = p - 1;
if p + 1 < high:
top += 1
stack[top] = p + 1;
top += 1
stack[top] = high;
@cuda.jit('void(int32[:, :], uint16[:, :])')
def argsort_flatter(X_feat: np.ndarray, indices: np.ndarray) -> None:
i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
if i < X_feat.shape[0]:
for j in range(indices.shape[1]):
indices[i, j] = j
argsort_bounded(X_feat[i], indices[i], 0, X_feat.shape[1] - 1)
def argsort(X_feat: np.ndarray) -> np.ndarray:
indices = np.empty_like(X_feat, dtype = np.uint16)
n_blocks = int(np.ceil(np.divide(X_feat.shape[0], NB_THREADS)))
d_X_feat = cuda.to_device(X_feat)
d_indices = cuda.to_device(indices)
argsort_flatter[n_blocks, NB_THREADS](d_X_feat, d_indices)
cuda.synchronize()
return d_indices.copy_to_host()

28
python/activate.sh Executable file
View File

@ -0,0 +1,28 @@
#!/bin/sh
# Exit if any of the command doesn't exit with code 0
set -e
EXEC_DIR=$1
test -z "$EXEC_DIR" && EXEC_DIR=..
VENV_PATH=$EXEC_DIR/python/venv
activate(){
if [ ! -d "$VENV_PATH" ]; then
echo 'Creating python virtual environnement'
python -m venv "$VENV_PATH"
echo 'Activating virtual environnement'
activate
echo 'Updating base pip packages'
python -m pip install -U setuptools pip
echo 'Installing requirements'
pip install -r "$EXEC_DIR"/python/requirements.txt
elif [ -f "$VENV_PATH"/Scripts/activate ]; then source "$VENV_PATH"/Scripts/activate
elif [ -f "$VENV_PATH"/bin/activate ]; then source "$VENV_PATH"/bin/activate
else
echo 'Python virtual environnement not detected'
exit 1
fi
}
activate

136
python/common.py Normal file
View File

@ -0,0 +1,136 @@
from toolbox import picke_multi_loader, format_time_ns, unit_test_argsort_2d
from typing import List, Tuple
from time import perf_counter_ns
import numpy as np
def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e-8) -> None:
"""Test if the each result is equals to other devices.
Given ViolaJones is a deterministic algorithm, the results no matter the device should be the same
(given the floating point fluctuations), this function check this assertion.
Args:
TS (List[int]): Number of trained weak classifiers.
labels (List[str], optional): List of the trained device names. Defaults to ["CPU", "GPU"].
tol (float, optional): Float difference tolerance. Defaults to 1e-8.
"""
if len(labels) < 2:
return print("Not enough devices to test")
fnc_s = perf_counter_ns()
n_total= 0
n_success = 0
print(f"\n| {'Unit testing':<37} | {'Test state':<10} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |")
print(f"|{'-'*39}|{'-'*12}|{'-'*19}|{'-'*31}|")
for filename in ["X_train_feat", "X_test_feat", "X_train_ii", "X_test_ii"]:
print(f"{filename}...", end = "\r")
bs = picke_multi_loader([f"{filename}_{label}" for label in labels], "./out")
for i, (b1, l1) in enumerate(zip(bs, labels)):
if b1 is None:
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
continue
for j, (b2, l2) in enumerate(zip(bs, labels)):
if i >= j:
continue
if b2 is None:
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
continue
n_total += 1
s = perf_counter_ns()
state = np.abs(b1 - b2).mean() < tol
e = perf_counter_ns() - s
if state:
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
n_success += 1
else:
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
for filename, featname in zip(["X_train_feat_argsort", "X_test_feat_argsort"], ["X_train_feat", "X_test_feat"]):
print(f"Loading {filename}...", end = "\r")
feat = None
bs = []
for label in labels:
if feat is None:
feat_tmp = picke_multi_loader([f"{featname}_{label}"], "./out")[0]
if feat_tmp is not None:
feat = feat_tmp
bs.append(picke_multi_loader([f"{filename}_{label}"], "./out")[0])
for i, (b1, l1) in enumerate(zip(bs, labels)):
if b1 is None:
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
continue
if feat is not None:
n_total += 1
s = perf_counter_ns()
state = unit_test_argsort_2d(feat, b1)
e = perf_counter_ns() - s
if state:
print(f"| {filename:<22} - {l1:<4} argsort | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
n_success += 1
else:
print(f"| {filename:<22} - {l1:<4} argsort | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
for j, (b2, l2) in enumerate(zip(bs, labels)):
if i >= j:
continue
if b2 is None:
#print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
continue
n_total += 1
s = perf_counter_ns()
state = np.abs(b1 - b2).mean() < tol
e = perf_counter_ns() - s
if state:
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
n_success += 1
else:
print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
for T in TS:
for filename in ["alphas", "final_classifiers"]:
print(f"{filename}_{T}...", end = "\r")
bs = picke_multi_loader([f"{filename}_{T}_{label}" for label in labels])
for i, (b1, l1) in enumerate(zip(bs, labels)):
if b1 is None:
#print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
continue
for j, (b2, l2) in enumerate(zip(bs, labels)):
if i >= j:
continue
if b2 is None:
#print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>17} | {'None':<29} |")
continue
n_total += 1
s = perf_counter_ns()
state = np.abs(b1 - b2).mean() < tol
e = perf_counter_ns() - s
if state:
print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
n_success += 1
else:
print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>17,} | {format_time_ns(e):<29} |")
print(f"|{'-'*39}|{'-'*12}|{'-'*19}|{'-'*31}|")
e = perf_counter_ns() - fnc_s
print(f"| {'Unit testing summary':<37} | {str(n_success) + '/' + str(n_total):>10} | {e:>17,} | {format_time_ns(e):<29} |")
def load_datasets(data_dir: str = "../data") -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Load the datasets.
Args:
data_dir (str, optional): [description]. Defaults to "../data".
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: [description]
"""
bytes_to_int_list = lambda b: list(map(int, b.rstrip().split(" ")))
def load(set_name: str) -> np.ndarray:
with open(f"{data_dir}/{set_name}.bin", "r") as f:
shape = bytes_to_int_list(f.readline())
return np.asarray(bytes_to_int_list(f.readline()), dtype = np.uint8).reshape(shape)
return load("X_train"), load("y_train"), load("X_test"), load("y_test")

15
python/config.py Normal file
View File

@ -0,0 +1,15 @@
# Save state to avoid recalulation on restart
SAVE_STATE = True
# Redo the state even if it's already saved
FORCE_REDO = False
# Use NJIT to greatly accelerate runtime
COMPILE_WITH_C = False
# Use GPU to greatly accelerate runtime (as priority over NJIT)
GPU_BOOSTED = False
# Number of weak classifiers
# TS = [1]
# TS = [1, 5, 10]
# TS = [1, 5, 10, 25, 50]
# TS = [1, 5, 10, 25, 50, 100, 200]
# TS = [1, 5, 10, 25, 50, 100, 200, 300]
TS = [1, 5, 10, 25, 50, 100, 200, 300, 400, 500, 1000]

57
python/convert_dataset.py Normal file
View File

@ -0,0 +1,57 @@
from io import BufferedReader
from tqdm import tqdm
from functools import partial
from sys import argv
import numpy as np
from os import path, listdir
# Makes the "leave" argument default to False
tqdm = partial(tqdm, leave = False)
def read_pgm(pgm_file: BufferedReader) -> np.ndarray:
"""Read the data of a PGM file
Args:
pgm_file (BufferedReader): PGM File
Returns:
np.ndarray: PGM data
"""
assert (f := pgm_file.readline()) == b'P5\n', f"Incorrect file format: {f}"
(width, height) = [int(i) for i in pgm_file.readline().split()]
assert width > 0 and height > 0, f"Incorrect dimensions: {width}x{height}"
assert (depth := int(pgm_file.readline())) < 256, f"Incorrect depth: {depth}"
buff = np.empty(height * width, dtype = np.uint8)
for i in range(buff.shape[0]):
buff[i] = ord(pgm_file.read(1))
return buff.reshape((height, width))
def __main__(data_path: str) -> None:
"""Read the data of every PGM file and output it in data files
Args:
data_path (str): Path of the PGM files
"""
for set_name in tqdm(["train", "test"], desc = "set name"):
X, y = [], []
for y_i, label in enumerate(tqdm(["non-face", "face"], desc = "label")):
for filename in tqdm(listdir(f"{data_path}/{set_name}/{label}"), desc = "Reading pgm file"):
with open(f"{data_path}/{set_name}/{label}/{filename}", "rb") as face:
X.append(read_pgm(face))
y.append(y_i)
X, y = np.asarray(X), np.asarray(y)
# idx = np.random.permutation(y.shape[0])
# X, y = X[idx], y[idx]
for org, s in tqdm(zip("Xy", [X, y]), desc = f"Writing {set_name}"):
with open(f"{data_path}/{org}_{set_name}.bin", "w") as out:
out.write(f'{str(s.shape)[1:-1].replace(",", "")}\n')
raw = s.ravel()
for s_i in tqdm(raw[:-1], desc = f"Writing {org}"):
out.write(f"{s_i} ")
out.write(str(raw[-1]))
if __name__ == "__main__":
__main__(argv[1]) if len(argv) == 2 else print(f"Usage: python {__file__[__file__.rfind(path.sep) + 1:]} ./data_location")

13
python/decorators.py Normal file
View File

@ -0,0 +1,13 @@
from typing import Callable, Iterable, Union, Any
from tqdm import tqdm
def njit(f: Union[Callable, str] = None, *args, **kwargs) -> Callable:
def decorator(func: Callable) -> Any:
return func
if callable(f):
return f
return decorator
def tqdm_iter(iter: Iterable, desc: str):
return tqdm(iter, leave = False, desc = desc)

226
python/projet.py Normal file
View File

@ -0,0 +1,226 @@
#!/usr/bin/env python
# Author: @saundersp
from ViolaJones import train_viola_jones, classify_viola_jones
from toolbox import state_saver, picke_multi_loader, format_time_ns, benchmark_function, toolbox_unit_test, unit_test_argsort_2d
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.feature_selection import SelectPercentile, f_classif
from common import load_datasets, unit_test
from ViolaJones import build_features, get_best_anova_features
from typing import Tuple
from time import perf_counter_ns
from os import makedirs
import numpy as np
#np.seterr(all = 'raise')
from config import FORCE_REDO, COMPILE_WITH_C, GPU_BOOSTED, TS, SAVE_STATE
if GPU_BOOSTED:
from ViolaJonesGPU import apply_features, set_integral_image, argsort
label = 'GPU' if COMPILE_WITH_C else 'PGPU'
# The parallel prefix sum doesn't use the whole GPU so numba output some annoying warnings, this disables it
from numba import config
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
else:
from ViolaJonesCPU import apply_features, set_integral_image, argsort
label = 'CPU' if COMPILE_WITH_C else 'PY'
# FIXME Debug code
# IDX_INSPECT = 0
# IDX_INSPECT = 2
IDX_INSPECT = 4548
IDX_INSPECT_OFFSET = 100
def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Train the weak classifiers.
Args:
X_train (np.ndarray): Training images.
X_test (np.ndarray): Testing Images.
y_train (np.ndarray): Training labels.
Returns:
Tuple[np.ndarray, np.ndarray]: Training and testing features.
"""
feats = state_saver("Building features", "feats", lambda: build_features(X_train.shape[1], X_train.shape[2]), FORCE_REDO, SAVE_STATE)
# FIXME Debug code
# print("feats")
# print(feats.shape)
# print(feats[IDX_INSPECT].ravel())
# return 0, 0
X_train_ii = state_saver(f"Converting training set to integral images ({label})", f"X_train_ii_{label}",
lambda: set_integral_image(X_train), FORCE_REDO, SAVE_STATE)
X_test_ii = state_saver(f"Converting testing set to integral images ({label})", f"X_test_ii_{label}",
lambda: set_integral_image(X_test), FORCE_REDO, SAVE_STATE)
# FIXME Debug code
# print("X_train_ii")
# print(X_train_ii.shape)
# print(X_train_ii[IDX_INSPECT])
# print("X_test_ii")
# print(X_test_ii.shape)
# print(X_test_ii[IDX_INSPECT])
# return 0, 0
X_train_feat = state_saver(f"Applying features to training set ({label})", f"X_train_feat_{label}",
lambda: apply_features(feats, X_train_ii), FORCE_REDO, SAVE_STATE)
X_test_feat = state_saver(f"Applying features to testing set ({label})", f"X_test_feat_{label}",
lambda: apply_features(feats, X_test_ii), FORCE_REDO, SAVE_STATE)
del X_train_ii, X_test_ii, feats
# FIXME Debug code
# print("X_train_feat")
# print(X_train_feat.shape)
# print(X_train_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
# print("X_test_feat")
# print(X_test_feat.shape)
# print(X_test_feat[IDX_INSPECT, : IDX_INSPECT_OFFSET])
# return 0, 0
#indices = state_saver("Selecting best features training set", "indices", force_redo = True, save_state = SAVE_STATE,
# fnc = lambda: SelectPercentile(f_classif, percentile = 10).fit(X_train_feat.T, y_train).get_support(indices = True))
#indices = state_saver("Selecting best features training set", "indices", force_redo = FORCE_REDO, save_state = SAVE_STATE,
# fnc = lambda: get_best_anova_features(X_train_feat, y_train))
#indices = benchmark_function("Selecting best features (manual)", lambda: get_best_anova_features(X_train_feat, y_train))
# FIXME Debug code
# print("indices")
# print(indices.shape)
# print(indices[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
# assert indices.shape[0] == indices_new.shape[0], f"Indices length not equal : {indices.shape} != {indices_new.shape}"
# assert (eq := indices == indices_new).all(), f"Indices not equal : {eq.sum() / indices.shape[0]}"
# return 0, 0
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
#return 0, 0
X_train_feat_argsort = state_saver(f"Precalculating training set argsort ({label})", f"X_train_feat_argsort_{label}",
lambda: argsort(X_train_feat), FORCE_REDO, SAVE_STATE)
# FIXME Debug code
# print("X_train_feat_argsort")
# print(X_train_feat_argsort.shape)
# print(X_train_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
# benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_train_feat, X_train_feat_argsort))
# return 0, 0
# X_test_feat_argsort = state_saver(f"Precalculating testing set argsort ({label})", f"X_test_feat_argsort_{label}",
# lambda: argsort(X_test_feat), True, False)
# FIXME Debug code
# print("X_test_feat_argsort")
# print(X_test_feat_argsort.shape)
# print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET])
# benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort))
# return 0, 0
# del X_test_feat_argsort
print(f"\n| {'Training':<49} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*19}|{'-'*31}|")
for T in TS:
# alphas, final_classifiers = state_saver(f"ViolaJones T = {T:<3} ({label})", [f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"],
state_saver(f"ViolaJones T = {T:<4} ({label})", [f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"],
lambda: train_viola_jones(T, X_train_feat, X_train_feat_argsort, y_train), FORCE_REDO, SAVE_STATE, "./models")
# FIXME Debug code
# print("alphas")
# print(alphas)
# print("final_classifiers")
# print(final_classifiers)
return X_train_feat, X_test_feat
def bench_accuracy(label, X_train_feat: np.ndarray, X_test_feat: np.ndarray, y_train: np.ndarray, y_test: np.ndarray) -> None:
"""Benchmark the trained classifiers on the training and testing sets.
Args:
X_train_feat (np.ndarray): Training features.
X_test_feat (np.ndarray): Testing features.
y_train (np.ndarray): Training labels.
y_test (np.ndarray): Testing labels.
"""
print(f"\n| {'Testing':<26} | Time spent (ns) (E) | {'Formatted time spent (E)':<29}", end = " | ")
print(f"Time spent (ns) (T) | {'Formatted time spent (T)':<29} |")
print(f"|{'-'*28}|{'-'*21}|{'-'*31}|{'-'*21}|{'-'*31}|")
perfs = []
for T in TS:
(alphas, final_classifiers) = picke_multi_loader([f"alphas_{T}_{label}", f"final_classifiers_{T}_{label}"])
s = perf_counter_ns()
y_pred_train = classify_viola_jones(alphas, final_classifiers, X_train_feat)
t_pred_train = perf_counter_ns() - s
e_acc = accuracy_score(y_train, y_pred_train)
e_f1 = f1_score(y_train, y_pred_train)
(_, e_FP), (e_FN, _) = confusion_matrix(y_train, y_pred_train)
s = perf_counter_ns()
y_pred_test = classify_viola_jones(alphas, final_classifiers, X_test_feat)
t_pred_test = perf_counter_ns() - s
t_acc = accuracy_score(y_test, y_pred_test)
t_f1 = f1_score(y_test, y_pred_test)
(_, t_FP), (t_FN, _) = confusion_matrix(y_test, y_pred_test)
perfs.append((e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP))
print(f"| {'ViolaJones T = ' + str(T):<19} {'(' + label + ')':<6}", end = " | ")
print(f"{t_pred_train:>19,} | {format_time_ns(t_pred_train):<29}", end = " | ")
print(f"{t_pred_test:>19,} | {format_time_ns(t_pred_test):<29} |")
print(f"\n| {'Evaluating':<19} | ACC (E) | F1 (E) | FN (E) | FP (E) | ACC (T) | F1 (T) | FN (T) | FP (T) | ")
print(f"|{'-'*21}|{'-'*9}|{'-'*8}|{'-'*8}|{'-'*8}|{'-'*9}|{'-'*8}|{'-'*8}|{'-'*8}|")
for T, (e_acc, e_f1, e_FN, e_FP, t_acc, t_f1, t_FN, t_FP) in zip(TS, perfs):
print(f"| {'ViolaJones T = ' + str(T):<19} | {e_acc:>7.2%} | {e_f1:>6.2f} | {e_FN:>6,} | {e_FP:>6,}", end = " | ")
print(f"{t_acc:>7.2%} | {t_f1:>6.2f} | {t_FN:>6,} | {t_FP:>6,} |")
def _main_() -> None:
# Creating state saver folders if they don't exist already
if SAVE_STATE:
for folder_name in ["models", "out"]:
makedirs(folder_name, exist_ok = True)
print(f"| {'Preprocessing':<49} | {'Time spent (ns)':<17} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*19}|{'-'*31}|")
X_train, y_train, X_test, y_test = state_saver("Loading sets", ["X_train", "y_train", "X_test", "y_test"],
load_datasets, FORCE_REDO, SAVE_STATE)
# FIXME Debug option (image width * log_10(length) + extra characters)
# np.set_printoptions(linewidth = 19 * 6 + 3)
# FIXME Debug code
# print("X_train")
# print(X_train.shape)
# print(X_train[IDX_INSPECT])
# print("X_test")
# print(X_test.shape)
# print(X_test[IDX_INSPECT])
# print("y_train")
# print(y_train.shape)
# print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
# print("y_test")
# print(y_test.shape)
# print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET])
# return
X_train_feat, X_test_feat = bench_train(X_train, X_test, y_train)
# FIXME Debug code
# return
# X_train_feat, X_test_feat = picke_multi_loader([f"X_train_feat_{label}", f"X_test_feat_{label}"], "./out")
# indices = picke_multi_loader(["indices"], "./out")[0]
# X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices]
bench_accuracy(label, X_train_feat, X_test_feat, y_train, y_test)
if __name__ == "__main__":
#toolbox_unit_test()
_main_()
# Only execute unit test after having trained the specified labels
unit_test(TS, ["GPU", "CPU", "PY", "PGPU"])
pass

5
python/requirements.txt Normal file
View File

@ -0,0 +1,5 @@
numba
scikit-learn
tqdm
pudb
nvitop

189
python/test.py Normal file
View File

@ -0,0 +1,189 @@
import numpy as np
from numba import cuda, config, njit
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
#import matplotlib.pyplot as plt
from tqdm import tqdm
from time import perf_counter_ns
from toolbox import format_time_ns
from pickle import load, dump
from sys import argv
def get(a):
with open(f"{a}.pkl", 'rb') as f:
return load(f)
def save(a, name) -> None:
with open(name, 'wb') as f:
dump(a, f)
def diff(folder, a, label1, label2):
af, bf = get(f"{folder}/{a}_{label1}"), get(f"{folder}/{a}_{label2}")
#print(af)
#print(bf)
print((af - bf).mean())
if __name__ == "__main__":
if len(argv) == 5:
diff(argv[1], argv[4], argv[2], argv[3])
def py_mean(a, b):
s = 0.0
for a_i, b_i in zip(a, b):
s += a_i * b_i
return s / a.shape[0]
def np_mean(a, b):
return np.mean(a * b)
@njit('float64(float64[:], float64[:])', fastmath = True, nogil = True)
def nb_mean(a, b):
return np.mean(a * b)
@njit('float64(float64[:], float64[:])', fastmath = True, nogil = True)
def nb_mean_loop(a, b):
s = 0.0
for a_i, b_i in zip(a, b):
s += a_i * b_i
return s / a.shape[0]
@cuda.jit('void(float64[:], float64[:], float64[:])', fastmath = True)
def cuda_mean_kernel(r, a, b):
s = 0.0
for a_i, b_i in zip(a, b):
s += a_i * b_i
r[0] = s / a.shape[0]
def cuda_mean(a, b):
r = cuda.to_device(np.empty(1, dtype = np.float64))
d_a = cuda.to_device(a)
d_b = cuda.to_device(b)
cuda_mean_kernel[1, 1](r, d_a, d_b)
return r.copy_to_host()[0]
def test_and_compare(labels, fncs, a, b):
m = []
for fnc in tqdm(fncs, leave = False, desc = "Calculating..."):
s = perf_counter_ns()
m.append([fnc(a, b), perf_counter_ns() - s])
print("Results:")
[print(f"\t{label:<10} {m_i:<20} {format_time_ns(time_i)}") for ((m_i, time_i), label) in zip(m, labels)]
print("Comparaison:")
for i, (m_i, label_i) in enumerate(zip(m, labels)):
for j, (m_j, label_j) in enumerate(zip(m, labels)):
if i >= j:
continue
print(f"\t{label_i:<10} vs {label_j:<10} - {abs(m_i[0] - m_j[0])}")
if __name__ == "__main__":
np.set_printoptions(linewidth = 10000, threshold = 1000)
N = int(2**20)
labels = ["Python", "Numpy", "Numba", "Numba loop", "CUDA"]
fncs = [py_mean, np_mean, nb_mean, nb_mean_loop, cuda_mean]
print(f"RANDOM for N={N}")
total_size = (2 * 8 * N)
print(f"Size = {total_size} B")
print(f"Size = {total_size // 1024} kB")
print(f"Size = {total_size // 1024 // 1024} MB")
print(f"Size = {total_size // 1024 // 1024 // 1024} GB")
a, b = np.random.rand(N).astype(np.float64), np.random.rand(N).astype(np.float64)
test_and_compare(labels, fncs, a, b)
del a, b
print(f"\nDETERMINSTIC for N={N}")
total_size = (2 * 8 * N) + (8 * N)
print(f"Size = {total_size} B")
print(f"Size = {total_size // 1024} kB")
print(f"Size = {total_size // 1024 // 1024} MB")
print(f"Size = {total_size // 1024 // 1024 // 1024} GB")
mask = np.arange(N, dtype = np.uint64)
a = np.ones(N, dtype = np.float64)
a[mask < N//2] = 0.1
del mask
b = np.ones(N, dtype = np.float64)
test_and_compare(labels, fncs, a, b)
del a, b
#from ViolaJonesGPU import argsort as argsort_GPU
#from ViolaJonesCPU import argsort as argsort_CPU
#from toolbox import unit_test_argsort_2d, benchmark_function
#labels = ["Numpy", "Numba", "CUDA"]
#a = np.random.randint(2**12, size = (2**20, 2**8), dtype = np.int32)
#m = [benchmark_function(f"Argsort {label}", lambda: f(np.copy(a))) for (label, f) in zip(labels, [
# lambda a: np.argsort(a).astype(np.uint16), argsort_CPU, argsort_GPU
#])]
#for i, (m_i, label_i) in enumerate(zip(m, labels)):
# #for j, (m_j, label_j) in enumerate(zip(m, labels)):
# # if i >= j:
# # continue
# # print(f"\t{label_i:<10} vs {label_j:<10} - {(m_i == m_j).mean()}")
# benchmark_function(f"Unit test {label_i}", lambda: unit_test_argsort_2d(a, m_i))
#for i in tqdm(range(X.shape[0]), leave = False, desc = "Extract image"):
# x = X[i]
# y = Y[i]
# fig = plt.figure()
# plt.imshow(x, cmap = 'gray')
# plt.savefig(f"imgs/{y}/{i}.png")
# plt.close(fig)
#def extract_FD(Xy):
# X_c, Y_c = [], []
# for x,y in Xy:
# X_c.append(x)
# Y_c.append(y)
# X_c = np.asarray(X_c)
# Y_c = np.asarray(Y_c)
# return X_c, Y_c
#X_train, y_train = get('out/X_train'), get('out/y_train')
#X_test, y_test = get('out/X_test'), get('out/y_test')
#X_train, y_train = extract_FD(get('/home/_aspil0w/git/FaceDetection/training'))
#X_test, y_test = extract_FD(get('/home/_aspil0w/git/FaceDetection/test'))
#save(X_train, 'out/X_train'), save(y_train, 'out/y_train')
#save(X_test, 'out/X_test'), save(y_test, 'out/y_test')
#print(X_train.shape, X_train_org.shape, X_train.shape == X_train_org.shape)
#print((X_train == X_train_org).mean())
#print(y_train.shape, y_train_org.shape, y_train.shape == y_train_org.shape)
#print((y_train == y_train_org).mean())
#print(X_test.shape, X_test_org.shape, X_test.shape == X_test_org.shape)
#print((X_test == X_test_org).mean())
#print(y_test.shape, y_test_org.shape, y_test.shape == y_test_org.shape)
#print((y_test == y_test_org).mean())
#@njit('uint16[:](uint8[:, :, :], uint8[:, :, :])')
#def arg_find(X, X_org):
# arg = np.empty(X.shape[0], dtype = np.uint16)
# for i, x in enumerate(X_org):
# found = False
# for j, x_org in enumerate(X):
# if np.all(x == x_org):
# arg[i] = j
# found = True
# break
# assert found, "Image not found"
# return arg
#print("Arg find results train")
#arg_train = arg_find(X_train, X_train_org)
#print((X_train[arg_train] == X_train_org).mean())
#print((y_train[arg_train] == y_train_org).mean())
#print("Arg find results test")
#arg_test = arg_find(X_test, X_test_org)
#print((X_test[arg_test] == X_test_org).mean())
#print((y_test[arg_test] == y_test_org).mean())
#for i in tqdm(range(X_c.shape[0]), leave = False, desc = "Extract image"):
# x = X_c[i]
# y = Y_c[i]
# fig = plt.figure()
# plt.imshow(x, cmap = 'gray')
# plt.savefig(f"imgs2/{y}/{i}.png")
# plt.close(fig)

153
python/toolbox.py Normal file
View File

@ -0,0 +1,153 @@
from typing import Any, Callable, List, Union
from time import perf_counter_ns
from numba import njit
import numpy as np
import pickle
import os
formats = ["ns", "µs", "ms", "s", "m", "h", "j", "w", "M", "y"]
nb = np.array([1, 1000, 1000, 1000, 60, 60, 24, 7, 4, 12], dtype = np.uint16)
def format_time_ns(time: int) -> str:
"""Format the time in nanoseconds in human readable format.
Args:
time (int): Time in nanoseconds.
Returns:
str: The formatted human readable string.
"""
assert time >= 0, "Incorrect time stamp"
if time == 0:
return "0ns"
prod = nb.prod(dtype = np.uint64)
s = ""
for i in range(nb.shape[0])[::-1]:
if time >= prod:
res = int(time // prod)
time = time % prod
s += f"{res}{formats[i]} "
prod = prod // nb[i]
assert time == 0, "Leftover in formatting time !"
return s.rstrip()
def toolbox_unit_test() -> None:
# FIXME Move unit test to different file
assert "0ns" == format_time_ns(0)
assert "1ns" == format_time_ns(1)
assert "1µs" == format_time_ns(int(1e3))
assert "1ms" == format_time_ns(int(1e6))
assert "1s" == format_time_ns(int(1e9))
assert "1m" == format_time_ns(int(6e10))
assert "1h" == format_time_ns(int(36e11))
assert "1j" == format_time_ns(int(864e11))
assert "1w" == format_time_ns(int(6048e11))
assert "1M" == format_time_ns(int(24192e11))
assert "1y" == format_time_ns(int(290304e11))
# UINT64_MAX == 2^64 = 18446744073709551615 == -1
assert "635y 5M 3j 23h 34m 33s 709ms 551µs 616ns" == format_time_ns(2**64)
def picke_multi_loader(filenames: List[str], save_dir: str = "./models") -> List[Any]:
"""Load multiple pickle data files.
Args:
filenames (List[str]): List of all the filename to load.
save_dir (str, optional): Path of the files to load. Defaults to "./models".
Returns:
List[Any]. List of loaded pickle data files.
"""
b = []
for f in filenames:
filepath = f"{save_dir}/{f}.pkl"
if os.path.exists(filepath):
with open(filepath, "rb") as filebyte:
b.append(pickle.load(filebyte))
else:
b.append(None)
return b
def benchmark_function(step_name: str, fnc: Callable) -> Any:
"""Benchmark a function and display the result of stdout.
Args:
step_name (str): Name of the function to call.
fnc (Callable): Function to call.
Returns:
Any: Result of the function.
"""
print(f"{step_name}...", end = "\r")
s = perf_counter_ns()
b = fnc()
e = perf_counter_ns() - s
print(f"| {step_name:<49} | {e:>17,} | {format_time_ns(e):<29} |")
return b
def state_saver(step_name: str, filename: Union[str, List[str]], fnc, force_redo: bool = False, save_state: bool = True, save_dir: str = "./out") -> Any:
"""Either execute a function then saves the result or load the already existing result.
Args:
step_name (str): Name of the function to call.
filename (Union[str, List[str]]): Name or list of names of the filenames where the result(s) are saved.
fnc ([type]): Function to call.
force_redo (bool, optional): Recall the function even if the result(s) is already saved. Defaults to False.
save_dir (str, optional): Path of the directory to save the result(s). Defaults to "./out".
Returns:
Any: The result(s) of the called function
"""
if isinstance(filename, str):
if not os.path.exists(f"{save_dir}/{filename}.pkl") or force_redo:
b = benchmark_function(step_name, fnc)
if save_state:
print(f"Saving results of {step_name}", end = '\r')
with open(f"{save_dir}/{filename}.pkl", 'wb') as f:
pickle.dump(b, f)
print(' ' * 100, end = '\r')
return b
else:
print(f"Loading results of {step_name}", end = '\r')
with open(f"{save_dir}/{filename}.pkl", "rb") as f:
res = pickle.load(f)
print(f"| {step_name:<49} | {'None':>17} | {'loaded saved state':<29} |")
return res
elif isinstance(filename, list):
abs = False
for fn in filename:
if not os.path.exists(f"{save_dir}/{fn}.pkl"):
abs = True
break
if abs or force_redo:
b = benchmark_function(step_name, fnc)
if save_state:
print(f"Saving results of {step_name}", end = '\r')
for bi, fnI in zip(b, filename):
with open(f"{save_dir}/{fnI}.pkl", 'wb') as f:
pickle.dump(bi, f)
print(' ' * 100, end = '\r')
return b
print(f"| {step_name:<49} | {'None':>17} | {'loaded saved state':<29} |")
b = []
print(f"Loading results of {step_name}", end = '\r')
for fn in filename:
with open(f"{save_dir}/{fn}.pkl", "rb") as f:
b.append(pickle.load(f))
print(' ' * 100, end = '\r')
return b
else:
assert False, f"Incompatible filename type = {type(filename)}"
@njit('boolean(int32[:, :], uint16[:, :])')
def unit_test_argsort_2d(arr: np.ndarray, indices: np.ndarray) -> bool:
n = indices.shape[0]
total = indices.shape[0] * indices.shape[1]
for i, sub_indices in enumerate(indices):
for j in range(sub_indices.shape[0] - 1):
if arr[i, sub_indices[j]] <= arr[i, sub_indices[j + 1]]:
n += 1
if n != total:
print(n, total, n / (total))
return n == total