import java.util.HashSet;
import java.util.Set;
public class KeywordMatching {
/**
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*/
Set<String> tokenize(String text) {
Set<String> words = new HashSet<>();
StringBuilder word = new StringBuilder();
for (char c : text.toCharArray()) {
if (Character.isLetterOrDigit(c)) {
word.append(Character.toLowerCase(c));
} else if (word.length() > 0) {
words.add(word.toString());
word.setLength(0);
}
}
if (word.length() > 0)
words.add(word.toString());
return words;
}
/**
// Find keyword matches (set intersection)
// -------------------------------------------------------------
This function receives two sets of words and returns a new set
containing only the words that appear in BOTH sets.
*/
Set<String> findMatches(Set<String> words1, Set<String> words2) {
Set<String> matches = new HashSet<>();
for (String w : words1) {
if (words2.contains(w)) {
matches.add(w);
}
}
return matches;
}
public static void main(String[] args) {
KeywordMatching km = new KeywordMatching();
// -------------------------------------------------------------
// Two text blocks to compare
// -------------------------------------------------------------
String text1 =
"Machine learning allows computers to learn from data. " +
"It is widely used in modern applications.";
String text2 =
"Data science uses machine learning techniques. " +
"Applications rely on data-driven models.";
// -------------------------------------------------------------
// Tokenize both texts
// -------------------------------------------------------------
Set<String> words1 = km.tokenize(text1);
Set<String> words2 = km.tokenize(text2);
// -------------------------------------------------------------
// Find keyword matches (set intersection)
// -------------------------------------------------------------
Set<String> matches = km.findMatches(words1, words2);
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
System.out.println("Keywords in Text 1:");
for (String w : words1) System.out.print(w + " ");
System.out.println("\n\nKeywords in Text 2:");
for (String w : words2) System.out.print(w + " ");
System.out.println("\n\nMatched Keywords:");
for (String w : matches) System.out.print(w + " ");
}
}
/*
run:
Keywords in Text 1:
allows data learn in widely learning is it used modern machine from to computers applications
Keywords in Text 2:
models data rely machine techniques science driven uses learning applications on
Matched Keywords:
data machine learning applications
*/