How to get the N top words of a string by occurrences in Kotlin

1 Answer

0 votes
import java.util.regex.Pattern

fun removeWord(s: String, word: String): String {
    val words = Pattern.compile("\\w+").matcher(s.lowercase()).let {
        val result = mutableListOf<String>()
        while (it.find()) {
            result.add(it.group())
        }
        result
    }
    
    return words.filter { it != word }.joinToString(" ")
}

fun getTopNWords(s: String, n: Int): Map<String, Int> {
    // Exclude stop words (commonly used words)
    val stopWords = listOf("is", "a", "to", "as", "can", "that", "on", "and", "the", "but")
    var modifiedString = s
    for (word in stopWords) {
        modifiedString = removeWord(modifiedString, word)
    }

    // Split the string into words
    val words = Pattern.compile("\\w+").matcher(modifiedString.lowercase()).let {
        val result = mutableListOf<String>()
        while (it.find()) {
            result.add(it.group())
        }
        result
    }

    // Count the occurrences of each word
    val wordCount = words.groupingBy { it }.eachCount()

    // Sort the words by their occurrences and get the top N words
    return wordCount.entries.sortedWith(compareByDescending<Map.Entry<String, Int>> { it.value }.thenBy { it.key })
        .take(n)
        .associate { it.toPair() }
}

fun main() {
    val s = "Kotlin is a cross-platform, statically typed, general-purpose " + 
    		"high-level programming language. Kotlin is designed to " + 
    		"interoperate fully with Java, and the JVM version of Kotlin's " + 
    		"standard library depends on the Java Class Library, " + 
    	    "but type inference allows its syntax to be more concise. " + 
    		"Kotlin mainly targets the JVM, but also compiles to JavaScript " + 
    		"or native iOS apps";

    val n = 4

    val topNWords = getTopNWords(s, n)

    for (key in topNWords.keys) {
        println(key)
    }
}

  
   
/*
run:

kotlin
java
jvm
library
 
*/

 



answered Feb 3 by avibootz
...