#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <regex>
// ------------------------------------------------------------
// Extract all alphabetic words from a string (lowercased)
// ------------------------------------------------------------
std::vector<std::string> extract_words(const std::string& text) {
std::vector<std::string> words;
std::regex word_re("[A-Za-z]+");
auto begin = std::sregex_iterator(text.begin(), text.end(), word_re);
auto end = std::sregex_iterator();
for (auto it = begin; it != end; ++it) {
std::string w = it->str();
std::transform(w.begin(), w.end(), w.begin(), ::tolower);
words.push_back(std::move(w));
}
return words;
}
// ------------------------------------------------------------
// Group words by *all* prefixes they contain
// ------------------------------------------------------------
std::unordered_map<std::string, std::vector<std::string>>
group_by_all_prefixes(const std::vector<std::string>& words)
{
std::unordered_map<std::string, std::vector<std::string>> groups;
for (const auto& w : words) {
for (size_t i = 1; i <= w.size(); ++i) {
groups[w.substr(0, i)].push_back(w);
}
}
return groups;
}
// ------------------------------------------------------------
// Filter prefixes that appear in at least `min_count` words
// ------------------------------------------------------------
std::vector<std::pair<std::string, std::vector<std::string>>>
filter_prefix_groups(
const std::unordered_map<std::string, std::vector<std::string>>& groups,
size_t min_count = 2)
{
std::vector<std::pair<std::string, std::vector<std::string>>> result;
for (const auto& [prefix, ws] : groups) {
if (ws.size() >= min_count)
result.emplace_back(prefix, ws);
}
return result;
}
// ------------------------------------------------------------
// Find the longest prefix among the filtered groups
// ------------------------------------------------------------
const std::pair<std::string, std::vector<std::string>>*
find_longest_prefix(
const std::vector<std::pair<std::string, std::vector<std::string>>>& groups)
{
if (groups.empty())
return nullptr;
return &*std::max_element(
groups.begin(), groups.end(),
[](const auto& a, const auto& b) {
return a.first.size() < b.first.size();
}
);
}
// ------------------------------------------------------------
// Main
// ------------------------------------------------------------
int main() {
std::string s =
"The Lowly inhabitants of the lowland were surprised to see "
"the lower branches of the trees.";
auto words = extract_words(s);
auto groups = group_by_all_prefixes(words);
auto filtered = filter_prefix_groups(groups, 2);
auto longest = find_longest_prefix(filtered);
if (longest) {
const auto& prefix = longest->first;
const auto& ws = longest->second;
std::cout << "Longest shared prefix:\n";
std::cout << prefix
<< " | prefix_len=" << prefix.size()
<< " | group_count=" << ws.size()
<< " | [";
for (size_t i = 0; i < ws.size(); ++i) {
std::cout << ws[i];
if (i + 1 < ws.size()) std::cout << ", ";
}
std::cout << "]\n";
}
}
/*
run:
Longest shared prefix:
lowl | prefix_len=4 | group_count=2 | [lowly, lowland]
*/