from typing import Tuple, Iterable from tqdm import tqdm import numpy as np import config if config.GPU_BOOSTED: from ViolaJonesGPU import train_weak_clf else: from ViolaJonesCPU import train_weak_clf if config.COMPILE_WITH_C: from numba import njit @njit def tqdm_iter(iter: Iterable, _: str): return iter else: from decorators import njit, tqdm_iter @njit('uint8[:, :, :, :](uint16, uint16)') def build_features(width: int, height: int) -> np.ndarray: """Initialize the features base on the input shape. Args: shape (Tuple[int, int]): Shape of the image (Width, Height) Returns: np.ndarray: The initialized features """ feats = [] empty = (0, 0, 0, 0) for w in range(1, width + 1): for h in range(1, height + 1): for i in range(width - w): for j in range(height - h): # 2 rectangle features immediate = (i, j, w, h) right = (i + w, j, w, h) if i + 2 * w < width: # Horizontally Adjacent feats.append(([right, empty], [immediate, empty])) bottom = (i, j + h, w, h) if j + 2 * h < height: # Vertically Adjacent feats.append((([immediate, empty], [bottom, empty]))) right_2 = (i + 2 * w, j, w, h) # 3 rectangle features if i + 3 * w < width: # Horizontally Adjacent feats.append((([right, empty], [right_2, immediate]))) bottom_2 = (i, j + 2 * h, w, h) if j + 3 * h < height: # Vertically Adjacent feats.append((([bottom, empty], [bottom_2, immediate]))) # 4 rectangle features bottom_right = (i + w, j + h, w, h) if i + 2 * w < width and j + 2 * h < height: feats.append((([right, bottom], [immediate, bottom_right]))) return np.asarray(feats, dtype = np.uint8) @njit('float64[:](uint8[:])') def init_weights(y_train: np.ndarray) -> np.ndarray: """Initialize the weights of the weak classifiers based on the training labels. Args: y_train (np.ndarray): Training labels Returns: np.ndarray: The initialized weights """ weights = np.empty_like(y_train, dtype = np.float64) t = y_train.sum() weights[y_train == 0] = 1.0 / (2 * t) weights[y_train == 1] = 1.0 / (2 * (y_train.shape[0] - t)) return weights @njit('int8[:](int32[:], int32, int32)') def classify_weak_clf(x_feat_i: np.ndarray, threshold: int, polarity: int) -> np.ndarray: """Classify the integrated features based on polarity and threshold. Args: x_feat_i (np.ndarray): Integrated features threshold (int): Trained threshold polarity (int): Trained polarity Returns: np.ndarray: Classified features """ res = np.zeros_like(x_feat_i, dtype = np.int8) res[polarity * x_feat_i < polarity * threshold] = 1 return res @njit('Tuple((int32, float64, float64[:]))(int32[:, :], float64[:], int32[:, :], uint8[:])') def select_best(classifiers: np.ndarray, weights: np.ndarray, X_feat: np.ndarray, y: np.ndarray) -> Tuple[int, float, np.ndarray]: """Select the best classifier given theirs predictions. Args: classifiers (np.ndarray): The weak classifiers weights (np.ndarray): Trained weights of each classifiers X_feat (np.ndarray): Integrated features y (np.ndarray): Features labels Returns: Tuple[int, float, np.ndarray]: Index of the best classifier, the best error and the best accuracy """ best_clf, best_error, best_accuracy = 0, np.inf, np.empty(X_feat.shape[1], dtype = np.float64) for j, (threshold, polarity) in enumerate(tqdm_iter(classifiers, "Selecting best classifiers")): accuracy = np.abs(classify_weak_clf(X_feat[j], threshold, polarity) - y).astype(np.float64) error = np.mean(weights * accuracy) if error < best_error: best_clf, best_error, best_accuracy = j, error, accuracy return best_clf, best_error, best_accuracy #@njit('Tuple((float64[:], int32[:, :]))(uint16, int32[:, :], uint16[:, :], uint8[:])') def train_viola_jones(T: int, X_feat: np.ndarray, X_feat_argsort: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Train the weak classifiers. Args: T (int): Number of weak classifiers X_feat (np.ndarray): Integrated features X_feat_argsort (np.ndarray): Sorted indexes of the integrated features y (np.ndarray): Features labels Returns: Tuple[np.ndarray, np.ndarray]: List of trained alphas and the list of the final classifiers """ weights = init_weights(y) alphas, final_classifier = np.empty(T, dtype = np.float64), np.empty((T, 3), dtype = np.int32) #for t in tqdm_iter(range(T), "Training ViolaJones"): for t in tqdm(range(T), desc = "Training ViolaJones", leave = False): weights /= weights.sum() classifiers = train_weak_clf(X_feat, X_feat_argsort, y, weights) clf, error, accuracy = select_best(classifiers, weights, X_feat, y) beta = error / (1.0 - error) weights *= beta ** (1.0 - accuracy) alphas[t] = np.log(1.0 / beta) final_classifier[t] = (clf, classifiers[clf][0], classifiers[clf][1]) return alphas, final_classifier @njit('uint8[:](float64[:], int32[:, :], int32[:, :])') def classify_viola_jones(alphas: np.ndarray, classifiers: np.ndarray, X_feat: np.ndarray) -> np.ndarray: """Classify the trained classifiers on the given features. Args: alphas (np.ndarray): Trained alphas classifiers (np.ndarray): Trained classifiers X_feat (np.ndarray): Integrated features Returns: np.ndarray: Classification results """ total = np.zeros(X_feat.shape[1], dtype = np.float64) for i, alpha in enumerate(tqdm_iter(alphas, "Classifying ViolaJones")): (j, threshold, polarity) = classifiers[i] total += alpha * classify_weak_clf(X_feat[j], threshold, polarity) y_pred = np.zeros(X_feat.shape[1], dtype = np.uint8) y_pred[total >= 0.5 * np.sum(alphas)] = 1 return y_pred #@njit #def get_best_anova_features(X: np.ndarray, y: np.ndarray) -> np.ndarray: # #SelectPercentile(f_classif, percentile = 10).fit(X, y).get_support(indices = True) # classes = [X.T[y == 0].astype(np.float64), X.T[y == 1].astype(np.float64)] # n_samples_per_class = np.asarray([classes[0].shape[0], classes[1].shape[0]]) # n_samples = classes[0].shape[0] + classes[1].shape[0] # ss_all_data = (classes[0] ** 2).sum(axis = 0) + (classes[1] ** 2).sum(axis = 0) # sums_classes = [np.asarray(classes[0].sum(axis = 0)), np.asarray(classes[1].sum(axis = 0))] # sq_of_sums_all_data = (sums_classes[0] + sums_classes[1]) ** 2 # sq_of_sums_args = [sums_classes[0] ** 2, sums_classes[1] ** 2] # ss_tot = ss_all_data - sq_of_sums_all_data / n_samples # # sqd_sum_bw_n = sq_of_sums_args[0] / n_samples_per_class[0] + \ # sq_of_sums_args[1] / n_samples_per_class[1] - sq_of_sums_all_data / n_samples # ss_wn = ss_tot - sqd_sum_bw_n # df_wn = n_samples - 2 # msw = ss_wn / df_wn # f_values = sqd_sum_bw_n / msw # return np.sort(np.argsort(f_values)[::-1][: int(np.ceil(X.shape[0] / 10.0))])