package main
import (
"fmt"
"regexp"
"sort"
"strings"
)
func removeWord(s, word string) string {
re := regexp.MustCompile(`\w+`)
words := re.FindAllString(strings.ToLower(s), -1)
var newWords []string
for _, w := range words {
if w != word {
newWords = append(newWords, w)
}
}
return strings.Join(newWords, " ")
}
func getTopNWords(s string, n int) map[string]int {
// Exclude stop words (commonly used words)
stopWords := []string{"is", "a", "to", "as", "for", "of", "at", "it",
"by", "on", "and", "the", "alongside", "also"}
for _, word := range stopWords {
s = removeWord(s, word)
}
// Split the string into words
re := regexp.MustCompile(`\w+`)
words := re.FindAllString(strings.ToLower(s), -1)
wordCount := make(map[string]int)
for _, word := range words {
wordCount[word]++
}
type kv struct {
Key string
Value int
}
// Sort the words by their occurrences and get the top N words
var sortedWordCount []kv
for k, v := range wordCount {
sortedWordCount = append(sortedWordCount, kv{k, v})
}
sort.Slice(sortedWordCount, func(i, j int) bool {
if sortedWordCount[i].Value == sortedWordCount[j].Value {
return sortedWordCount[i].Key < sortedWordCount[j].Key
}
return sortedWordCount[i].Value > sortedWordCount[j].Value
})
topNWords := make(map[string]int)
for i := 0; i < n && i < len(sortedWordCount); i++ {
topNWords[sortedWordCount[i].Key] = sortedWordCount[i].Value
}
return topNWords
}
func main() {
s := "Go is a statically typed, compiled high-level " +
"general purpose programming language. It is known for " +
"the simplicity of its syntax and the efficiency of development " +
"that it enables by the inclusion of a large standard library " +
"supplying many needs for common projects. Go was designed at " +
"Google in 2009 by Robert Griesemer, Rob Pike, and Ken Thompson."
n := 5
topNWords := getTopNWords(s, n)
for key := range topNWords {
fmt.Println(key)
}
}
/*
run:
go
2009
common
compiled
designed
*/