ViolaJones/cpp/ViolaJonesCPU.cpp
2024-04-28 22:11:33 +02:00

172 lines
5.9 KiB
C++

#include "data.hpp"
#include "config.hpp"
#if GPU_BOOSTED == false
/**
* @brief Transform the input images in integrated images (CPU version).
*
* @param X Dataset of images
* @return Dataset of integrated images
*/
np::Array<uint32_t> set_integral_image(const np::Array<uint8_t>& set) noexcept {
np::Array<uint32_t> X_ii = np::empty<uint32_t>(set.shape);
size_t i, y, x, s;
uint32_t ii[set.shape[1] * set.shape[2]];
const size_t length = np::prod(set.shape);
for (size_t offset = 0; offset < length; offset += set.shape[1] * set.shape[2]) {
for (i = 0; i < set.shape[1] * set.shape[2]; ++i)
ii[i] = 0;
for (y = 1; y < set.shape[1]; ++y) {
s = 0;
for (x = 0; x < set.shape[2] - 1; ++x) {
s += set[offset + (y - 1) * set.shape[2] + x];
ii[y * set.shape[2] + x + 1] = s + ii[(y - 1) * set.shape[2] + x + 1];
}
}
for (y = 0; y < set.shape[1]; ++y)
for (x = 0; x < set.shape[2]; ++x)
X_ii[offset + y * set.shape[2] + x] = ii[y * set.shape[2] + x];
}
return X_ii;
}
constexpr static inline int16_t __compute_feature__(const np::Array<uint32_t>& X_ii, const size_t& j, const int16_t& x, const int16_t& y, const int16_t& w, const int16_t& h) noexcept {
const size_t _y = y * X_ii.shape[1] + x;
const size_t _yh = _y + h * X_ii.shape[1];
return X_ii[j + _yh + w] + X_ii[j + _y] - X_ii[j + _yh] - X_ii[j + _y + w];
}
/**
* @brief Apply the features on a integrated image dataset (CPU version).
*
* @param feats Features to apply
* @param X_ii Integrated image dataset
* @return Applied features
*/
np::Array<int32_t> apply_features(const np::Array<uint8_t>& feats, const np::Array<uint32_t>& X_ii) noexcept {
np::Array<int32_t> X_feat = np::empty<int32_t>({ feats.shape[0], X_ii.shape[0] });
size_t j, feat_idx = 0;
int16_t p1, p2, n1, n2;
const size_t feats_length = np::prod(feats.shape), X_ii_length = np::prod(X_ii.shape);
const size_t feats_step = np::prod(feats.shape, 1), X_ii_step = np::prod(X_ii.shape, 1);
for (size_t i = 0; i < feats_length; i += feats_step){
for (j = 0; j < X_ii_length; j += X_ii_step) {
p1 = __compute_feature__(X_ii, j, feats[i + 0], feats[i + 1], feats[i + 2], feats[i + 3]);
p2 = __compute_feature__(X_ii, j, feats[i + 4], feats[i + 5], feats[i + 6], feats[i + 7]);
n1 = __compute_feature__(X_ii, j, feats[i + 8], feats[i + 9], feats[i + 10], feats[i + 11]);
n2 = __compute_feature__(X_ii, j, feats[i + 12], feats[i + 13], feats[i + 14], feats[i + 15]);
X_feat[feat_idx++] = static_cast<int32_t>(p1 + p2) - static_cast<int32_t>(n1 + n2);
}
}
return X_feat;
}
np::Array<float64_t> train_weak_clf(const np::Array<int32_t>& X_feat, const np::Array<uint16_t>& X_feat_argsort, const np::Array<uint8_t>& y, const np::Array<float64_t>& weights) noexcept {
float64_t total_pos = 0.0, total_neg = 0.0;
for(size_t i = 0; i < y.shape[0]; ++i)
(y[i] == static_cast<uint8_t>(1) ? total_pos : total_neg) += weights[i];
np::Array<float64_t> classifiers = np::empty<float64_t>({ X_feat.shape[0], 2});
for(size_t i = 0; i < X_feat.shape[0]; ++i){
size_t pos_seen = 0, neg_seen = 0;
float64_t pos_weights = 0.0, neg_weights = 0.0;
float64_t min_error = np::inf, best_threshold = 0.0, best_polarity = 0.0;
for(size_t j = 0; j < X_feat_argsort.shape[1]; ++j) {
const float64_t error = std::min(neg_weights + total_pos - pos_weights, pos_weights + total_neg - neg_weights);
if (error < min_error){
min_error = error;
best_threshold = X_feat[i * X_feat.shape[1] + X_feat_argsort[i * X_feat.shape[1] + j]];
best_polarity = pos_seen > neg_seen ? 1.0 : -1.0;
}
if(y[X_feat_argsort[i * X_feat.shape[1] + j]] == static_cast<uint8_t>(1)){
++pos_seen;
pos_weights += weights[X_feat_argsort[i * X_feat.shape[1] + j]];
} else {
++neg_seen;
neg_weights += weights[X_feat_argsort[i * X_feat.shape[1] + j]];
}
}
classifiers[i * 2] = best_threshold; classifiers[i * 2 + 1] = best_polarity;
}
return classifiers;
}
/**
* @brief Perform an indirect sort of a given array within a given bound.
*
* @tparam T Inner type of the array
* @param a Array to sort
* @param indices Array of indices to write to
* @param low lower bound to sort
* @param high higher bound to sort
*/
template<typename T>
static void argsort(const T* const a, uint16_t* const indices, size_t low, size_t high) noexcept {
const size_t total = high - low + 1;
size_t* const stack = new size_t[total]{low, high};
//size_t stack[total];
//stack[0] = l;
//stack[1] = h;
size_t top = 1;
while (top <= total) {
high = stack[top--];
low = stack[top--];
if(low >= high)
break;
const size_t p = as_partition(a, indices, low, high);
if (p - 1 > low && p - 1 < total) {
stack[++top] = low;
stack[++top] = p - 1;
}
if (p + 1 < high) {
stack[++top] = p + 1;
stack[++top] = high;
}
}
delete[] stack;
}
/**
* @brief Apply argsort to every column of a given 2D array.
*
* @tparam T Inner type of the array
* @param a 2D Array to sort
* @return 2D Array of indices that sort the array
*/
template<typename T>
static np::Array<uint16_t> argsort_bounded(const np::Array<T>& a, const size_t& low, const size_t& high) noexcept {
np::Array<uint16_t> indices = np::empty(a.shape);
map(indices, [](const size_t& i, const uint16_t&) -> uint16_t { return i; });
argsort_bounded(a, indices, low, high);
return indices;
}
/**
* @brief Perform an indirect sort on each column of a given 2D array (CPU version).
*
* @param a 2D Array to sort
* @return 2D Array of indices that sort the array
*/
np::Array<uint16_t> argsort_2d(const np::Array<int32_t>& X_feat) noexcept {
const np::Array<uint16_t> indices = np::empty<uint16_t>(X_feat.shape);
const size_t length = np::prod(X_feat.shape);
for (size_t i = 0; i < length; i += X_feat.shape[1]) {
for(size_t j = 0; j < X_feat.shape[1]; ++j) indices[i + j] = j;
argsort(&X_feat[i], &indices[i], 0, X_feat.shape[1] - 1);
}
return indices;
}
#endif // GPU_BOOSTED == false