From 5371c6f201d63196ebfb56fc5a42dc0c91eca432 Mon Sep 17 00:00:00 2001 From: saundersp Date: Tue, 25 Jul 2023 12:41:03 +0200 Subject: [PATCH] python : clearer main algorithm progression && revamp final test display --- python/common.py | 115 +++++++++++++++++++++-------------------------- python/projet.py | 109 ++++++++++++++++++++++---------------------- 2 files changed, 107 insertions(+), 117 deletions(-) diff --git a/python/common.py b/python/common.py index a03ccdd..bf25ba9 100644 --- a/python/common.py +++ b/python/common.py @@ -2,9 +2,9 @@ from toolbox import picke_multi_loader, format_time_ns, unit_test_argsort_2d from typing import List, Tuple from time import perf_counter_ns import numpy as np -from config import OUT_DIR, DATA_DIR +from config import OUT_DIR, DATA_DIR, __DEBUG -def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e-8) -> None: +def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU", "PY", "PGPU"], tol: float = 1e-8) -> None: """Test if the each result is equals to other devices. Given ViolaJones is a deterministic algorithm, the results no matter the device should be the same @@ -12,83 +12,78 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e Args: TS (List[int]): Number of trained weak classifiers. - labels (List[str], optional): List of the trained device names. Defaults to ["CPU", "GPU"]. + labels (List[str], optional): List of the trained device names. Defaults to ["CPU", "GPU", "PY", "PGPU"] (see config.py for more info). tol (float, optional): Float difference tolerance. Defaults to 1e-8. """ if len(labels) < 2: return print("Not enough devices to test") - fnc_s = perf_counter_ns() - n_total= 0 - n_success = 0 print(f"\n| {'Unit testing':<37} | {'Test state':<10} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |") print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|") - for filename in ["X_train_feat", "X_test_feat", "X_train_ii", "X_test_ii"]: - print(f"{filename}...", end = "\r") - bs = picke_multi_loader([f"{filename}_{label}" for label in labels], OUT_DIR) + fnc_s = perf_counter_ns() + n_total = 0 + n_success = 0 - for i, (b1, l1) in enumerate(zip(bs, labels)): - if b1 is None: - #print(f"| {filename:<22} - {l1:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") - continue - for j, (b2, l2) in enumerate(zip(bs, labels)): - if i >= j: - continue - if b2 is None: - #print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") - continue - n_total += 1 - s = perf_counter_ns() - state = np.abs(b1 - b2).mean() < tol - e = perf_counter_ns() - s - if state: - print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |") - n_success += 1 - else: - print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |") + def test_fnc(title, fnc): + nonlocal n_total, n_success + n_total += 1 + s = perf_counter_ns() + state = fnc() + e = perf_counter_ns() - s + if state: + print(f"| {title:<37} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |") + n_success += 1 + else: + print(f"| {title:<37} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |") - for filename, featname in zip(["X_train_feat_argsort", "X_test_feat_argsort"], ["X_train_feat", "X_test_feat"]): - print(f"Loading {filename}...", end = "\r") + for set_name in ["train", "test"]: + for filename in ["ii", "feat"]: + title = f"X_{set_name}_{filename}" + print(f"{filename}...", end = "\r") + bs = picke_multi_loader([f"{title}_{label}" for label in labels], OUT_DIR) + + for i, (b1, l1) in enumerate(zip(bs, labels)): + if b1 is None: + if __DEBUG: + print(f"| {title:<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") + continue + for j, (b2, l2) in enumerate(zip(bs, labels)): + if i >= j: + continue + if b2 is None: + if __DEBUG: + print(f"| {title:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") + continue + test_fnc(f"{title:<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol) + + title = f"X_{set_name}_feat_argsort" + print(f"Loading {title}...", end = "\r") feat = None bs = [] for label in labels: if feat is None: - feat_tmp = picke_multi_loader([f"{featname}_{label}"], OUT_DIR)[0] + feat_tmp = picke_multi_loader([f"X_{set_name}_feat_{label}"], OUT_DIR)[0] if feat_tmp is not None: feat = feat_tmp - bs.append(picke_multi_loader([f"{filename}_{label}"], OUT_DIR)[0]) + bs.append(picke_multi_loader([f"{title}_{label}"], OUT_DIR)[0]) for i, (b1, l1) in enumerate(zip(bs, labels)): if b1 is None: - #print(f"| {filename:<22} - {l1:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") + if __DEBUG: + print(f"| {title:<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") continue if feat is not None: - n_total += 1 - s = perf_counter_ns() - state = unit_test_argsort_2d(feat, b1) - e = perf_counter_ns() - s - if state: - print(f"| {filename:<22} - {l1:<4} argsort | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |") - n_success += 1 - else: - print(f"| {filename:<22} - {l1:<4} argsort | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |") + test_fnc(f"{title:<22} - {l1:<4} argsort", lambda: unit_test_argsort_2d(feat, b1)) for j, (b2, l2) in enumerate(zip(bs, labels)): if i >= j: continue if b2 is None: - #print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") + if __DEBUG: + print(f"| {title:<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") continue - n_total += 1 - s = perf_counter_ns() - state = np.abs(b1 - b2).mean() < tol - e = perf_counter_ns() - s - if state: - print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |") - n_success += 1 - else: - print(f"| {filename:<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |") + test_fnc(f"{title:<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol) for T in TS: for filename in ["alphas", "final_classifiers"]: @@ -97,23 +92,17 @@ def unit_test(TS: List[int], labels: List[str] = ["CPU", "GPU"], tol: float = 1e for i, (b1, l1) in enumerate(zip(bs, labels)): if b1 is None: - #print(f"| {filename + '_' + str(T):<22} - {l1:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") + if __DEBUG: + print(f"| {filename + '_' + str(T):<22} - {l1:<12} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") continue for j, (b2, l2) in enumerate(zip(bs, labels)): if i >= j: continue if b2 is None: - #print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") + if __DEBUG: + print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Skipped':>10} | {'None':>18} | {'None':<29} |") continue - n_total += 1 - s = perf_counter_ns() - state = np.abs(b1 - b2).mean() < tol - e = perf_counter_ns() - s - if state: - print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Passed':>10} | {e:>18,} | {format_time_ns(e):<29} |") - n_success += 1 - else: - print(f"| {filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4} | {'Failed':>10} | {e:>18,} | {format_time_ns(e):<29} |") + test_fnc(f"{filename + '_' + str(T):<22} - {l1:<4} vs {l2:<4}", lambda: np.abs(b1 - b2).mean() < tol) print(f"|{'-'*39}|{'-'*12}|{'-'*20}|{'-'*31}|") e = perf_counter_ns() - fnc_s diff --git a/python/projet.py b/python/projet.py index 84c0723..daabe35 100644 --- a/python/projet.py +++ b/python/projet.py @@ -26,17 +26,36 @@ else: from ViolaJonesCPU import apply_features, set_integral_image, argsort label = 'CPU' if COMPILE_WITH_C else 'PY' -def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - """Train the weak classifiers. - - Args: - X_train (np.ndarray): Training images. - X_test (np.ndarray): Testing Images. - y_train (np.ndarray): Training labels. +def preprocessing() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Load the dataset, calculate features and integral images, apply features to images and calculate argsort of the featured images. Returns: - Tuple[np.ndarray, np.ndarray]: Training and testing features. + Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test """ + # Creating state saver folders if they don't exist already + if SAVE_STATE: + for folder_name in ["models", "out"]: + makedirs(folder_name, exist_ok = True) + + print(f"| {'Preprocessing':<49} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*20}|{'-'*31}|") + + X_train, y_train, X_test, y_test = state_saver("Loading sets", ["X_train", "y_train", "X_test", "y_test"], + load_datasets, FORCE_REDO, SAVE_STATE) + + if __DEBUG: + print("X_train") + print(X_train.shape) + print(X_train[IDX_INSPECT]) + print("X_test") + print(X_test.shape) + print(X_test[IDX_INSPECT]) + print("y_train") + print(y_train.shape) + print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) + print("y_test") + print(y_test.shape) + print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) + feats = state_saver("Building features", "feats", lambda: build_features(X_train.shape[1], X_train.shape[2]), FORCE_REDO, SAVE_STATE) if __DEBUG: @@ -77,13 +96,12 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> # fnc = lambda: get_best_anova_features(X_train_feat, y_train)) #indices = benchmark_function("Selecting best features (manual)", lambda: get_best_anova_features(X_train_feat, y_train)) - # FIXME Debug code - # print("indices") - # print(indices.shape) - # print(indices[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) - # assert indices.shape[0] == indices_new.shape[0], f"Indices length not equal : {indices.shape} != {indices_new.shape}" - # assert (eq := indices == indices_new).all(), f"Indices not equal : {eq.sum() / indices.shape[0]}" - # return 0, 0 + #if __DEBUG: + # print("indices") + # print(indices.shape) + # print(indices[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) + # assert indices.shape[0] == indices_new.shape[0], f"Indices length not equal : {indices.shape} != {indices_new.shape}" + # assert (eq := indices == indices_new).all(), f"Indices not equal : {eq.sum() / indices.shape[0]}" # X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices] @@ -104,8 +122,17 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> print(X_test_feat_argsort.shape) print(X_test_feat_argsort[IDX_INSPECT, : IDX_INSPECT_OFFSET]) benchmark_function("Arg unit test", lambda: unit_test_argsort_2d(X_test_feat, X_test_feat_argsort)) - del X_test_feat_argsort + return X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test + +def train(X_train_feat: np.ndarray, X_train_feat_argsort: np.ndarray, y_train: np.ndarray) -> None: + """Train the weak classifiers. + + Args: + X_train (np.ndarray): Training images. + X_test (np.ndarray): Testing Images. + y_train (np.ndarray): Training labels. + """ print(f"\n| {'Training':<49} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*20}|{'-'*31}|") for T in TS: @@ -117,15 +144,13 @@ def bench_train(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray) -> print("final_classifiers") print(final_classifiers) - return X_train_feat, X_test_feat - -def bench_accuracy(label, X_train_feat: np.ndarray, X_test_feat: np.ndarray, y_train: np.ndarray, y_test: np.ndarray) -> None: +def testing_and_evaluating(X_train_feat: np.ndarray, y_train: np.ndarray, X_test_feat: np.ndarray, y_test: np.ndarray) -> None: """Benchmark the trained classifiers on the training and testing sets. Args: X_train_feat (np.ndarray): Training features. - X_test_feat (np.ndarray): Testing features. y_train (np.ndarray): Training labels. + X_test_feat (np.ndarray): Testing features. y_test (np.ndarray): Testing labels. """ print(f"\n| {'Testing':<26} | Time spent (ns) (E) | {'Formatted time spent (E)':<29}", end = " | ") @@ -162,45 +187,21 @@ def bench_accuracy(label, X_train_feat: np.ndarray, X_test_feat: np.ndarray, y_t print(f"| {'ViolaJones T = ' + str(T):<19} | {e_acc:>7.2%} | {e_f1:>6.2f} | {e_FN:>6,} | {e_FP:>6,}", end = " | ") print(f"{t_acc:>7.2%} | {t_f1:>6.2f} | {t_FN:>6,} | {t_FP:>6,} |") -def _main_() -> None: +def main() -> None: + print(f"| {'Unit testing':<49} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |") + print(f"|{'-'*51}|{'-'*20}|{'-'*31}|") + benchmark_function("Testing format_time_ns", format_time_ns_test) + print() - # Creating state saver folders if they don't exist already - if SAVE_STATE: - for folder_name in ["models", "out"]: - makedirs(folder_name, exist_ok = True) - - print(f"| {'Preprocessing':<49} | {'Time spent (ns)':<18} | {'Formatted time spent':<29} |\n|{'-'*51}|{'-'*20}|{'-'*31}|") - - X_train, y_train, X_test, y_test = state_saver("Loading sets", ["X_train", "y_train", "X_test", "y_test"], - load_datasets, FORCE_REDO, SAVE_STATE) - - if __DEBUG: - print("X_train") - print(X_train.shape) - print(X_train[IDX_INSPECT]) - print("X_test") - print(X_test.shape) - print(X_test[IDX_INSPECT]) - print("y_train") - print(y_train.shape) - print(y_train[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) - print("y_test") - print(y_test.shape) - print(y_test[IDX_INSPECT: IDX_INSPECT + IDX_INSPECT_OFFSET]) - - X_train_feat, X_test_feat = bench_train(X_train, X_test, y_train) + X_train_feat, X_train_feat_argsort, y_train, X_test_feat, y_test = preprocessing() + train(X_train_feat, X_train_feat_argsort, y_train) # X_train_feat, X_test_feat = picke_multi_loader([f"X_train_feat_{label}", f"X_test_feat_{label}"], OUT_DIR) # indices = picke_multi_loader(["indices"], OUT_DIR)[0] # X_train_feat, X_test_feat = X_train_feat[indices], X_test_feat[indices] - bench_accuracy(label, X_train_feat, X_test_feat, y_train, y_test) + testing_and_evaluating(X_train_feat, y_train, X_test_feat, y_test) + unit_test(TS) if __name__ == "__main__": - _main_() - if __DEBUG: - toolbox_unit_test() - - # Only execute unit test after having trained the specified labels - unit_test(TS, ["GPU", "CPU", "PY", "PGPU"]) - pass + main()