import scala.collection.mutable
/*
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*/
def tokenize(text: String): Set[String] = {
val words = mutable.Set[String]()
val sb = new StringBuilder
for (c <- text) {
if (c.isLetterOrDigit) {
sb.append(c.toLower)
} else if (sb.nonEmpty) {
words += sb.toString
sb.clear()
}
}
if (sb.nonEmpty)
words += sb.toString
words.toSet
}
/*
// Find keyword matches across THREE OR MORE texts
// -------------------------------------------------------------
This function receives a vector of sets.
It returns the intersection of ALL sets.
*/
def findMatchesMultiple(allSets: Vector[Set[String]]): Set[String] = {
if (allSets.isEmpty) return Set.empty
// Start with the first set
var result: Set[String] = allSets.head
// Intersect with each remaining set
for (i <- 1 until allSets.length) {
val temp = mutable.Set[String]()
for (w <- result) {
if (allSets(i).contains(w)) {
temp += w
}
}
result = temp.toSet
}
result
}
@main def main(): Unit = {
// -------------------------------------------------------------
// Three text blocks to compare
// -------------------------------------------------------------
val text1 =
"Machine learning allows computers to learn from data. " +
"It is widely used in modern applications."
val text2 =
"Data science uses machine learning techniques. " +
"Applications rely on data-driven models."
val text3 =
"Modern applications of machine learning include data analysis, " +
"automation, and intelligent systems."
// -------------------------------------------------------------
// Tokenize all texts
// -------------------------------------------------------------
val words1 = tokenize(text1)
val words2 = tokenize(text2)
val words3 = tokenize(text3)
// Put them into a vector for multi-text comparison
val allSets = Vector(words1, words2, words3)
// -------------------------------------------------------------
// Find keyword matches across ALL texts
// -------------------------------------------------------------
val matches = findMatchesMultiple(allSets)
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
println("Matched Keywords Across ALL Texts:")
matches.foreach(w => print(s"$w "))
}
/*
run:
Matched Keywords Across ALL Texts:
data machine learning applications
*/