import java.util.HashSet;
import java.util.ArrayList;
import java.util.Set;
import java.util.List;
public class KeywordMatching {
/**
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*/
public static Set<String> tokenize(String text) {
Set<String> words = new HashSet<>();
StringBuilder word = new StringBuilder();
for (char c : text.toCharArray()) {
if (Character.isLetterOrDigit(c)) {
word.append(Character.toLowerCase(c));
} else if (word.length() > 0) {
words.add(word.toString());
word.setLength(0);
}
}
if (word.length() > 0) {
words.add(word.toString());
}
return words;
}
/**
// Find keyword matches across THREE OR MORE texts
// -------------------------------------------------------------
This function receives a vector of sets.
It returns the intersection of ALL sets.
*/
public static Set<String> findMatchesMultiple(List<Set<String>> allSets) {
if (allSets.isEmpty()) return new HashSet<>();
// Start with the first set
Set<String> result = new HashSet<>(allSets.get(0));
// Intersect with each remaining set
for (int i = 1; i < allSets.size(); i++) {
Set<String> temp = new HashSet<>();
for (String w : result) {
if (allSets.get(i).contains(w)) {
temp.add(w);
}
}
result = temp;
}
return result;
}
public static void main(String[] args) {
// -------------------------------------------------------------
// Three text blocks to compare
// -------------------------------------------------------------
String text1 =
"Machine learning allows computers to learn from data. " +
"It is widely used in modern applications.";
String text2 =
"Data science uses machine learning techniques. " +
"Applications rely on data-driven models.";
String text3 =
"Modern applications of machine learning include data analysis, " +
"automation, and intelligent systems.";
// -------------------------------------------------------------
// Tokenize all texts
// -------------------------------------------------------------
Set<String> words1 = tokenize(text1);
Set<String> words2 = tokenize(text2);
Set<String> words3 = tokenize(text3);
// Put them into a vector for multi-text comparison
List<Set<String>> allSets = new ArrayList<>();
allSets.add(words1);
allSets.add(words2);
allSets.add(words3);
// -------------------------------------------------------------
// Find keyword matches across ALL texts
// -------------------------------------------------------------
Set<String> matches = findMatchesMultiple(allSets);
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
System.out.println("Matched Keywords Across ALL Texts:");
for (String w : matches)
System.out.print(w + " ");
}
}
/*
run:
Matched Keywords Across ALL Texts:
data machine learning applications
*/