94 lines
3.9 KiB
C++
94 lines
3.9 KiB
C++
#include "data.hpp"
|
|
#include "toolbox.hpp"
|
|
|
|
np::Array<uint32_t> set_integral_image_cpu(const np::Array<uint8_t>& set) noexcept {
|
|
np::Array<uint32_t> X_ii = np::empty<uint32_t>(set.shape);
|
|
|
|
size_t i, y, x, s;
|
|
uint32_t ii[set.shape[1] * set.shape[2]];
|
|
const size_t length = np::prod(set.shape);
|
|
for (size_t offset = 0; offset < length; offset += set.shape[1] * set.shape[2]) {
|
|
for (i = 0; i < set.shape[1] * set.shape[2]; ++i)
|
|
ii[i] = 0;
|
|
for (y = 1; y < set.shape[1]; ++y) {
|
|
s = 0;
|
|
for (x = 0; x < set.shape[2] - 1; ++x) {
|
|
s += set[offset + (y - 1) * set.shape[2] + x];
|
|
ii[y * set.shape[2] + x + 1] = s + ii[(y - 1) * set.shape[2] + x + 1];
|
|
}
|
|
}
|
|
for (y = 0; y < set.shape[1]; ++y)
|
|
for (x = 0; x < set.shape[2]; ++x)
|
|
X_ii[offset + y * set.shape[2] + x] = ii[y * set.shape[2] + x];
|
|
}
|
|
|
|
return X_ii;
|
|
}
|
|
|
|
constexpr static inline int16_t __compute_feature__(const np::Array<uint32_t>& X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept {
|
|
const size_t _y = y * X_ii.shape[1] + x;
|
|
const size_t _yh = _y + h * X_ii.shape[1];
|
|
return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w];
|
|
}
|
|
|
|
np::Array<int32_t> apply_features_cpu(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
|
|
np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
|
|
|
|
size_t j, feat_idx = 0;
|
|
int16_t p1, p2, n1, n2;
|
|
const size_t feats_length = np::prod(feats.shape), X_ii_length = np::prod(X_ii.shape);
|
|
const size_t feats_step = np::prod(feats.shape, 1), X_ii_step = np::prod(X_ii.shape, 1);
|
|
for (size_t i = 0; i < feats_length; i += feats_step){
|
|
for (j = 0; j < X_ii_length; j += X_ii_step) {
|
|
p1 = __compute_feature__(X_ii, j, feats[i + 0], feats[i + 1], feats[i + 2], feats[i + 3]);
|
|
p2 = __compute_feature__(X_ii, j, feats[i + 4], feats[i + 5], feats[i + 6], feats[i + 7]);
|
|
n1 = __compute_feature__(X_ii, j, feats[i + 8], feats[i + 9], feats[i + 10], feats[i + 11]);
|
|
n2 = __compute_feature__(X_ii, j, feats[i + 12], feats[i + 13], feats[i + 14], feats[i + 15]);
|
|
X_feat[feat_idx++] = static_cast<int32_t>(p1 + p2) - static_cast<int32_t>(n1 + n2);
|
|
}
|
|
}
|
|
|
|
return X_feat;
|
|
}
|
|
|
|
np::Array<float64_t> train_weak_clf_cpu(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y, const np::Array<float64_t>& weights) noexcept {
|
|
float64_t total_pos = 0.0, total_neg = 0.0;
|
|
for(size_t i = 0; i < y.shape[0]; ++i)
|
|
(y[i] == static_cast<uint8_t>(1) ? total_pos : total_neg) += weights[i];
|
|
|
|
np::Array<float64_t> classifiers = np::empty<float64_t>({ X_feat.shape[0], 2});
|
|
for(size_t i = 0; i < X_feat.shape[0]; ++i){
|
|
size_t pos_seen = 0, neg_seen = 0;
|
|
float64_t pos_weights = 0.0, neg_weights = 0.0;
|
|
float64_t min_error = np::inf, best_threshold = 0.0, best_polarity = 0.0;
|
|
for(size_t j = 0; j < X_feat_argsort.shape[1]; ++j) {
|
|
const float64_t error = std::min(neg_weights + total_pos - pos_weights, pos_weights + total_neg - neg_weights);
|
|
if (error < min_error){
|
|
min_error = error;
|
|
best_threshold = X_feat[i * X_feat.shape[1] + X_feat_argsort[i * X_feat.shape[1] + j]];
|
|
best_polarity = pos_seen > neg_seen ? 1.0 : -1.0;
|
|
}
|
|
if(y[X_feat_argsort[i * X_feat.shape[1] + j]] == static_cast<uint8_t>(1)){
|
|
++pos_seen;
|
|
pos_weights += weights[X_feat_argsort[i * X_feat.shape[1] + j]];
|
|
} else {
|
|
++neg_seen;
|
|
neg_weights += weights[X_feat_argsort[i * X_feat.shape[1] + j]];
|
|
}
|
|
}
|
|
classifiers[i * 2] = best_threshold; classifiers[i * 2 + 1] = best_polarity;
|
|
}
|
|
return classifiers;
|
|
}
|
|
|
|
np::Array<uint16_t> argsort_2d_cpu(const np::Array<int32_t>& X_feat) noexcept {
|
|
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
|
|
const size_t length = np::prod(X_feat.shape);
|
|
for (size_t i = 0; i < length; i += X_feat.shape[1]) {
|
|
for(size_t j = 0; j < X_feat.shape[1]; ++j) indices[i + j] = j;
|
|
argsort(&X_feat[i], &indices[i], 0, X_feat.shape[1] - 1);
|
|
}
|
|
return indices;
|
|
}
|
|
|