import Foundation
/*
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*/
func tokenize(_ text: String) -> Set<String> {
var words = Set<String>()
var word = ""
for c in text {
if c.isLetter || c.isNumber {
word.append(c.lowercased())
} else if !word.isEmpty {
words.insert(word)
word.removeAll()
}
}
if !word.isEmpty {
words.insert(word)
}
return words
}
/*
// Find keyword matches across THREE OR MORE texts
// -------------------------------------------------------------
This function receives a vector of sets.
It returns the intersection of ALL sets.
*/
func findMatchesMultiple(_ allSets: [Set<String>]) -> Set<String> {
guard !allSets.isEmpty else { return [] }
// Start with the first set
var result = allSets[0]
// Intersect with each remaining set
for i in 1..<allSets.count {
var temp = Set<String>()
for w in result {
if allSets[i].contains(w) {
temp.insert(w)
}
}
result = temp
}
return result
}
// -------------------------------------------------------------
// Three text blocks to compare
// -------------------------------------------------------------
let text1 =
"Machine learning allows computers to learn from data. " +
"It is widely used in modern applications."
let text2 =
"Data science uses machine learning techniques. " +
"Applications rely on data-driven models."
let text3 =
"Modern applications of machine learning include data analysis, " +
"automation, and intelligent systems."
// -------------------------------------------------------------
// Tokenize all texts
// -------------------------------------------------------------
let words1 = tokenize(text1)
let words2 = tokenize(text2)
let words3 = tokenize(text3)
// Put them into a vector for multi-text comparison
let allSets = [words1, words2, words3]
// -------------------------------------------------------------
// Find keyword matches across ALL texts
// -------------------------------------------------------------
let matches = findMatchesMultiple(allSets)
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
print("Matched Keywords Across ALL Texts:")
for w in matches {
print("\(w) ", terminator: "")
}
/*
run:
Matched Keywords Across ALL Texts:
learning data machine applications
*/