#include "data.hpp" #include "toolbox.hpp" np::Array set_integral_image_cpu(const np::Array& set) noexcept { np::Array X_ii = np::empty(set.shape); size_t i, y, x, s; uint32_t ii[set.shape[1] * set.shape[2]]; const size_t length = np::prod(set.shape); for (size_t offset = 0; offset < length; offset += set.shape[1] * set.shape[2]) { for (i = 0; i < set.shape[1] * set.shape[2]; ++i) ii[i] = 0; for (y = 1; y < set.shape[1]; ++y) { s = 0; for (x = 0; x < set.shape[2] - 1; ++x) { s += set[offset + (y - 1) * set.shape[2] + x]; ii[y * set.shape[2] + x + 1] = s + ii[(y - 1) * set.shape[2] + x + 1]; } } for (y = 0; y < set.shape[1]; ++y) for (x = 0; x < set.shape[2]; ++x) X_ii[offset + y * set.shape[2] + x] = ii[y * set.shape[2] + x]; } return X_ii; } constexpr static inline int16_t __compute_feature__(const np::Array& X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept { const size_t _y = y * X_ii.shape[1] + x; const size_t _yh = _y + h * X_ii.shape[1]; return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w]; } np::Array apply_features_cpu(const np::Array& feats, const np::Array& X_ii) noexcept { np::Array X_feat = np::empty({ feats.shape[0], X_ii.shape[0] }); size_t j, feat_idx = 0; int16_t p1, p2, n1, n2; const size_t feats_length = np::prod(feats.shape), X_ii_length = np::prod(X_ii.shape); const size_t feats_step = np::prod(feats.shape, 1), X_ii_step = np::prod(X_ii.shape, 1); for (size_t i = 0; i < feats_length; i += feats_step){ for (j = 0; j < X_ii_length; j += X_ii_step) { p1 = __compute_feature__(X_ii, j, feats[i + 0], feats[i + 1], feats[i + 2], feats[i + 3]); p2 = __compute_feature__(X_ii, j, feats[i + 4], feats[i + 5], feats[i + 6], feats[i + 7]); n1 = __compute_feature__(X_ii, j, feats[i + 8], feats[i + 9], feats[i + 10], feats[i + 11]); n2 = __compute_feature__(X_ii, j, feats[i + 12], feats[i + 13], feats[i + 14], feats[i + 15]); X_feat[feat_idx++] = static_cast(p1 + p2) - static_cast(n1 + n2); } } return X_feat; } np::Array train_weak_clf_cpu(const np::Array& X_feat, const np::Array& X_feat_argsort, const np::Array& y, const np::Array& weights) noexcept { float64_t total_pos = 0.0, total_neg = 0.0; for(size_t i = 0; i < y.shape[0]; ++i) (y[i] == static_cast(1) ? total_pos : total_neg) += weights[i]; np::Array classifiers = np::empty({ X_feat.shape[0], 2}); for(size_t i = 0; i < X_feat.shape[0]; ++i){ size_t pos_seen = 0, neg_seen = 0; float64_t pos_weights = 0.0, neg_weights = 0.0; float64_t min_error = np::inf, best_threshold = 0.0, best_polarity = 0.0; for(size_t j = 0; j < X_feat_argsort.shape[1]; ++j) { const float64_t error = std::min(neg_weights + total_pos - pos_weights, pos_weights + total_neg - neg_weights); if (error < min_error){ min_error = error; best_threshold = X_feat[i * X_feat.shape[1] + X_feat_argsort[i * X_feat.shape[1] + j]]; best_polarity = pos_seen > neg_seen ? 1.0 : -1.0; } if(y[X_feat_argsort[i * X_feat.shape[1] + j]] == static_cast(1)){ ++pos_seen; pos_weights += weights[X_feat_argsort[i * X_feat.shape[1] + j]]; } else { ++neg_seen; neg_weights += weights[X_feat_argsort[i * X_feat.shape[1] + j]]; } } classifiers[i * 2] = best_threshold; classifiers[i * 2 + 1] = best_polarity; } return classifiers; } np::Array argsort_2d_cpu(const np::Array& X_feat) noexcept { const np::Array indices = np::empty(X_feat.shape); const size_t length = np::prod(X_feat.shape); for (size_t i = 0; i < length; i += X_feat.shape[1]) { for(size_t j = 0; j < X_feat.shape[1]; ++j) indices[i + j] = j; argsort(&X_feat[i], &indices[i], 0, X_feat.shape[1] - 1); } return indices; }