python : improved documentation

This commit is contained in:
saundersp
2024-04-28 22:35:42 +02:00
parent c71b04f00d
commit 4a42747837
5 changed files with 78 additions and 77 deletions

View File

@ -32,8 +32,8 @@ def __kernel_scan_3d__(n: int, j: int, d_inter: np.ndarray, d_a: np.ndarray) ->
Args:
n (int): Number of width blocks
j (int): Temporary sum index
d_inter (np.ndarray): Temporary sums in device to add
d_a (np.ndarray): Dataset of images in device to apply sum
d_inter (np.ndarray): Temporary sums on device to add
d_a (np.ndarray): Dataset of images on device to apply sum
"""
x_coor, y_coor = cuda.grid(2)
@ -76,8 +76,8 @@ def __add_3d__(d_X: np.ndarray, d_s: np.ndarray, n: int, m: int) -> None:
"""GPU kernel for parallel sum.
Args:
d_X (np.ndarray): Dataset of images in device
d_s (np.ndarray): Temporary sums in device to add
d_X (np.ndarray): Dataset of images on device
d_s (np.ndarray): Temporary sums on device to add
n (int): Number of width blocks
m (int): Height of a block
"""
@ -131,7 +131,7 @@ def __transpose_kernel__(d_X: np.ndarray, d_Xt: np.ndarray) -> None:
"""GPU kernel of the function __transpose_3d__.
Args:
d_X (np.ndarray): Dataset of images in device
d_X (np.ndarray): Dataset of images on device
d_Xt(np.ndarray): Transposed dataset of images
width (int): Width of each images in the dataset
height (int): Height of each images in the dataset
@ -184,11 +184,11 @@ def __train_weak_clf_kernel__(d_classifiers: np.ndarray, d_y: np.ndarray, d_X_fe
"""GPU kernel of the function train_weak_clf.
Args:
d_classifiers (np.ndarray): Weak classifiers to train
d_y (np.ndarray): Labels of the features
d_X_feat (np.ndarray): Feature images dataset
d_X_feat_argsort (np.ndarray): Sorted indexes of the integrated features
d_weights (np.ndarray): Weights of the features
d_classifiers (np.ndarray): Weak classifiers on device to train
d_y (np.ndarray): Labels of the features on device
d_X_feat (np.ndarray): Feature images dataset on device
d_X_feat_argsort (np.ndarray): Sorted indexes of the integrated features on device
d_weights (np.ndarray): Weights of the features on device
total_pos (float): Total of positive labels in the dataset
total_neg (float): Total of negative labels in the dataset
"""
@ -259,29 +259,29 @@ def __compute_feature__(ii: np.ndarray, x: int, y: int, w: int, h: int) -> int:
return ii[y + h, x + w] + ii[y, x] - ii[y + h, x] - ii[y, x + w]
@cuda.jit('void(int32[:, :], uint8[:, :, :, :], uint32[:, :, :])')
def __apply_feature_kernel__(X_feat: np.ndarray, feats: np.ndarray, X_ii: np.ndarray) -> None:
def __apply_feature_kernel__(d_X_feat: np.ndarray, d_feats: np.ndarray, d_X_ii: np.ndarray) -> None:
"""GPU kernel of the function apply_features.
Args:
X_feat (np.ndarray): Feature images dataset on device
feats (np.ndarray): Features on device to apply
X_ii (np.ndarray): Integrated image dataset on device
d_X_feat (np.ndarray): Feature images dataset on device
d_feats (np.ndarray): Features on device to apply
d_X_ii (np.ndarray): Integrated image dataset on device
n (int): Number of features
m (int): Number of images of the dataset
"""
x, y = cuda.grid(2)
if x >= feats.shape[0] or y >= X_ii.shape[0]:
if x >= d_feats.shape[0] or y >= d_X_ii.shape[0]:
return
p_x, p_y, p_w, p_h = feats[x, 0, 0]
p1_x, p1_y, p1_w, p1_h = feats[x, 0, 1]
n_x, n_y, n_w, n_h = feats[x, 1, 0]
n1_x, n1_y, n1_w, n1_h = feats[x, 1, 1]
sP = __compute_feature__(X_ii[y], p_x, p_y, p_w, p_h) + \
__compute_feature__(X_ii[y], p1_x, p1_y, p1_w, p1_h)
sN = __compute_feature__(X_ii[y], n_x, n_y, n_w, n_h) + \
__compute_feature__(X_ii[y], n1_x, n1_y, n1_w, n1_h)
X_feat[x, y] = sP - sN
p_x, p_y, p_w, p_h = d_feats[x, 0, 0]
p1_x, p1_y, p1_w, p1_h = d_feats[x, 0, 1]
n_x, n_y, n_w, n_h = d_feats[x, 1, 0]
n1_x, n1_y, n1_w, n1_h = d_feats[x, 1, 1]
sP = __compute_feature__(d_X_ii[y], p_x, p_y, p_w, p_h) + \
__compute_feature__(d_X_ii[y], p1_x, p1_y, p1_w, p1_h)
sN = __compute_feature__(d_X_ii[y], n_x, n_y, n_w, n_h) + \
__compute_feature__(d_X_ii[y], n1_x, n1_y, n1_w, n1_h)
d_X_feat[x, y] = sP - sN
#@njit('int32[:, :](uint8[:, :, :, :], uint32[:, :, :])')
def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
@ -303,7 +303,7 @@ def apply_features(feats: np.ndarray, X_ii: np.ndarray) -> np.ndarray:
return d_X_feat.copy_to_host()
@cuda.jit('int32(int32[:], uint16[:], int32, int32)', device = True)
def _as_partition_(d_a: np.ndarray, d_indices: np.ndarray, l: int, h: int) -> int:
def _as_partition_(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int) -> int:
"""Partition of the argsort algorithm.
Args:
@ -315,10 +315,10 @@ def _as_partition_(d_a: np.ndarray, d_indices: np.ndarray, l: int, h: int) -> in
Returns:
int: Last index sorted
"""
i = l - 1
j = l
for j in range(l, h + 1):
if d_a[d_indices[j]] < d_a[d_indices[h]]:
i = low - 1
j = low
for j in range(low, high + 1):
if d_a[d_indices[j]] < d_a[d_indices[high]]:
i += 1
d_indices[i], d_indices[j] = d_indices[j], d_indices[i]
@ -368,11 +368,11 @@ def argsort_bounded(d_a: np.ndarray, d_indices: np.ndarray, low: int, high: int)
@cuda.jit('void(int32[:, :], uint16[:, :])')
def argsort_flatter(d_a: np.ndarray, d_indices: np.ndarray) -> None:
# TODO Finish doxygen
"""Cuda kernel where argsort is applied to every columns of a given 2D array.
"""Cuda kernel where argsort is applied to every column of a given 2D array.
Args:
d_a (np.ndarray): Array in device to sort
d_indices (np.ndarray): Array of indices on device to write to
d_a (np.ndarray): 2D Array on device to sort
d_indices (np.ndarray): 2D Array of indices on device to write to
"""
i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
if i < d_a.shape[0]:
@ -380,19 +380,19 @@ def argsort_flatter(d_a: np.ndarray, d_indices: np.ndarray) -> None:
d_indices[i, j] = j
argsort_bounded(d_a[i], d_indices[i], 0, d_a.shape[1] - 1)
def argsort(a: np.ndarray) -> np.ndarray:
"""Perform an indirect sort of a given array
def argsort_2d(a: np.ndarray) -> np.ndarray:
"""Perform an indirect sort on each column of a given 2D array
Args:
a (np.ndarray): Array to sort
a (np.ndarray): 2D Array to sort
Returns:
np.ndarray: Array of indices that sort the array
np.ndarray: 2D Array of indices that sort the array
"""
indices = np.empty_like(a, dtype = np.uint16)
n_blocks = int(np.ceil(np.divide(a.shape[0], NB_THREADS)))
d_X_feat = cuda.to_device(a)
d_a = cuda.to_device(a)
d_indices = cuda.to_device(indices)
argsort_flatter[n_blocks, NB_THREADS](d_X_feat, d_indices)
argsort_flatter[n_blocks, NB_THREADS](d_a, d_indices)
cuda.synchronize()
return d_indices.copy_to_host()