#pragma once
#include <cstddef>
#include <cstdint>
#include <algorithm>
#include <string>
template <typename T = double>
inline std::enable_if_t<std::is_floating_point_v<T>, T>
jaroSimilarity(const std::string source,
const std::string target)
{
if (source == target)
return 1;
if (source.empty() || target.empty())
return 0;
const auto sl = source.length();
const auto tl = target.length();
const auto match_distance = std::max(sl, tl) < 2
? 0
: std::max(sl, tl) / 2 - 1;
auto source_matches = std::make_unique<bool[]>(sl);
auto target_matches = std::make_unique<bool[]>(tl);
std::size_t matches = 0;
for (std::size_t i = 0; i < sl; ++i) {
const auto end = std::min(i + match_distance + 1, tl);
const auto start = i > match_distance ? (i - match_distance) : 0u;
for (auto k = start; k < end; ++k) {
if (!target_matches[k] && source[i] == target[k]) {
target_matches[k] = source_matches[i] = true;
++matches;
break;
}
}
}
if (matches == 0) {
return 0;
}
std::size_t t = 0;
for (std::size_t i = 0, k = 0; i < sl; ++i) {
if (source_matches[i]) {
while (!target_matches[k]) ++k;
if (source[i] != target[k]) ++t;
++k;
}
}
const T m = static_cast<T>(matches);
return (m / sl + m / tl + 1 - t / m / 2) / 3.0;
}
bool jaroWinklerSimilarity(const std::string source,
const std::string target,
const std::size_t prefix = 2,
const double boost_treshold = 0.7,
const double scaling_factor = 0.1)
{
const auto similarity = jaroSimilarity<double>(source, target);
if (similarity > boost_treshold) {
const auto l = std::min({ source.length(), target.length(), prefix });
std::size_t common_prefix = 0;
for (; common_prefix < l; ++common_prefix) {
if (source[common_prefix] != target[common_prefix]) break;
}
return (similarity
+ scaling_factor * common_prefix * (1 - similarity))>boost_treshold;
}
return false;
}