How to get the N top words of a string by occurrences in Swift

1 Answer

0 votes
import Foundation

func removeWord(_ s: String, _ word: String) -> String {
    let words = s.lowercased().components(separatedBy: CharacterSet.alphanumerics.inverted)
    let newWords = words.filter { $0 != word }
    
    return newWords.joined(separator: " ")
}

func getTopNWords(_ s: String, _ n: Int) -> [String: Int] {
    // Exclude stop words (commonly used words)
    let stopWords: Set<String> = ["is", "a", "to", "as", "for", "of", "by",
                                  "on", "in", "and", "the", "alongside", "also"]
    var processedString = s
    for word in stopWords {
        processedString = removeWord(processedString, word)
    }

    // Split the string into words
    let words = processedString.lowercased().components(separatedBy: CharacterSet.alphanumerics.inverted)
    
    // Count the occurrences of each word
    var wordCount: [String: Int] = [:]
    for word in words where !word.isEmpty {
        wordCount[word, default: 0] += 1
    }

    // Sort the words by their occurrences 
    let sortedWords = wordCount.sorted { 
        if $0.value == $1.value { return $0.key < $1.key }
        
        return $0.value > $1.value
    }

    // Get the top N words
    var topNWords: [String: Int] = [:]
    for i in 0..<min(n, sortedWords.count) {
        topNWords[sortedWords[i].key] = sortedWords[i].value
    }

    return topNWords
}

let s = "Swift is a high-level general-purpose, multi-paradigm, compiled " +
        "programming language created by Chris Lattner in 2010 for Apple Inc. " + 
        "and maintained by the open-source community. Swift compiles to machine code " + 
        "and uses an LLVM-based compiler. Swift was first released in June 2014 " +
        "and the Swift toolchain has shipped in Xcode since Xcode version 6, " +
        "released in September 2014.";

let n = 4
let topNWords = getTopNWords(s, n)

for key in topNWords.keys {
    print(key)
}



/*
run:

released
2014
swift
xcode

*/

 



answered Feb 3 by avibootz
edited Feb 3 by avibootz
...