How to find keyword matching between multiple text blocks in Swift

1 Answer

0 votes
import Foundation

/*
    Tokenize text into words.
    - Keeps only letters and digits
    - Splits on punctuation and spaces
*/
func tokenize(_ text: String) -> Set<String> {
    var words = Set<String>()
    var word = ""

    for c in text {
        if c.isLetter || c.isNumber {
            word.append(c.lowercased())
        } else if !word.isEmpty {
            words.insert(word)
            word.removeAll()
        }
    }

    if !word.isEmpty {
        words.insert(word)
    }

    return words
}

/*
    // Find keyword matches across THREE OR MORE texts
    // -------------------------------------------------------------
    This function receives a vector of sets.
    It returns the intersection of ALL sets.
*/
func findMatchesMultiple(_ allSets: [Set<String>]) -> Set<String> {
    guard !allSets.isEmpty else { return [] }

    // Start with the first set
    var result = allSets[0]

    // Intersect with each remaining set
    for i in 1..<allSets.count {
        var temp = Set<String>()

        for w in result {
            if allSets[i].contains(w) {
                temp.insert(w)
            }
        }

        result = temp
    }

    return result
}

// -------------------------------------------------------------
// Three text blocks to compare
// -------------------------------------------------------------
let text1 =
    "Machine learning allows computers to learn from data. " +
    "It is widely used in modern applications."

let text2 =
    "Data science uses machine learning techniques. " +
    "Applications rely on data-driven models."

let text3 =
    "Modern applications of machine learning include data analysis, " +
    "automation, and intelligent systems."

// -------------------------------------------------------------
// Tokenize all texts
// -------------------------------------------------------------
let words1 = tokenize(text1)
let words2 = tokenize(text2)
let words3 = tokenize(text3)

// Put them into a vector for multi-text comparison
let allSets = [words1, words2, words3]

// -------------------------------------------------------------
// Find keyword matches across ALL texts
// -------------------------------------------------------------
let matches = findMatchesMultiple(allSets)

// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
print("Matched Keywords Across ALL Texts:")
for w in matches {
    print("\(w) ", terminator: "")
}



/*
run:

Matched Keywords Across ALL Texts:
learning data machine applications 

*/

 



answered 4 hours ago by avibootz

Related questions

...