import scala.collection.mutable
import scala.util.matching.Regex
def removeWord(s: String, word: String): String = {
val re: Regex = "\\w+".r
val words: Seq[String] = re.findAllIn(s.toLowerCase).toSeq
val newWords: Seq[String] = words.filter(_ != word)
newWords.mkString(" ")
}
def getTopNWords(s: String, n: Int): Map[String, Int] = {
// Exclude stop words (commonly used words)
val stopWords: Set[String] = Set("is", "a", "to", "as", "for", "of",
"on", "and", "the", "alongside", "also", "are", "be")
var processedStr = s
stopWords.foreach { word =>
processedStr = removeWord(processedStr, word)
}
// Split the string into words
val re: Regex = "\\w+".r
val words: Seq[String] = re.findAllIn(processedStr.toLowerCase).toSeq
// Count the occurrences of each word
val wordCount = mutable.Map[String, Int]()
words.foreach { word =>
wordCount(word) = wordCount.getOrElse(word, 0) + 1
}
// Sort the words by their occurrences and get the top N words
val topNWords = wordCount.toSeq
.sortWith((a, b) => if (a._2 == b._2) a._1 < b._1 else a._2 > b._2)
.take(n)
.toMap
topNWords
}
object Main extends App {
val s = "Scala is a strong statically typed high-level general-purpose " +
"programming language that supports both object-oriented programming " +
"and functional programming. Designed to be concise, many of Scala " +
"design decisions are intended to address criticisms of Java.";
val n = 4
val topNWords = getTopNWords(s, n)
topNWords.keys.foreach(println)
}
/*
run:
programming
scala
address
both
*/