#include <iostream>
#include <set>
#include <algorithm> // transform
#include <cctype>
/*
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*/
std::set<std::string> tokenize(const std::string &text) {
std::set<std::string> words;
std::string word;
for (char c : text) {
if (std::isalnum(c)) {
word += std::tolower(c);
} else if (!word.empty()) {
words.insert(word);
word.clear();
}
}
if (!word.empty()) words.insert(word);
return words;
}
/*
// Find keyword matches (set intersection)
// -------------------------------------------------------------
This function receives two sets of words and returns a new set
containing only the words that appear in BOTH sets.
*/
std::set<std::string> findMatches(const std::set<std::string> &words1,
const std::set<std::string> &words2)
{
std::set<std::string> matches;
for (const std::string &w : words1) {
if (words2.count(w)) {
matches.insert(w);
}
}
return matches;
}
int main() {
// -------------------------------------------------------------
// Two text blocks to compare
// -------------------------------------------------------------
std::string text1 =
"Machine learning allows computers to learn from data. "
"It is widely used in modern applications.";
std::string text2 =
"Data science uses machine learning techniques. "
"Applications rely on data-driven models.";
// -------------------------------------------------------------
// Tokenize both texts
// -------------------------------------------------------------
std::set<std::string> words1 = tokenize(text1);
std::set<std::string> words2 = tokenize(text2);
// -------------------------------------------------------------
// Find keyword matches (set intersection)
// -------------------------------------------------------------
std::set<std::string> matches = findMatches(words1, words2);
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
std::cout << "Keywords in Text 1:\n";
for (const std::string &w : words1) std::cout << w << " ";
std::cout << "\n\nKeywords in Text 2:\n";
for (const std::string &w : words2) std::cout << w << " ";
std::cout << "\n\nMatched Keywords:\n";
for (const std::string &w : matches) std::cout << w << " ";
std::cout << "\n";
}
/*
run:
Keywords in Text 1:
allows applications computers data from in is it learn learning machine modern to used widely
Keywords in Text 2:
applications data driven learning machine models on rely science techniques uses
Matched Keywords:
applications data learning machine
*/