How to get the N top words of a string by occurrences in VB.NET

1 Answer

0 votes
Imports System
Imports System.Linq
Imports System.Collections.Generic

Public Class TopNWords
    Private Shared Function RemoveWord(ByVal str As String, ByVal word As String) As String
        Dim words As String() = str.ToLower().Split({" "c, ","c, "."c, ";"c, ":"c, "!"c, "?"c}, StringSplitOptions.RemoveEmptyEntries)
        Dim newStr As String = ""

        For Each s As String In words
            If Not s.Equals(word) Then
                newStr += s & " "
            End If
        Next

        Return newStr.Trim()
    End Function

    Public Shared Function GetTopNWords(ByVal str As String, ByVal n As Integer) As Dictionary(Of String, Long)
        str = RemoveWord(str, "is")
        str = RemoveWord(str, "a")
        str = RemoveWord(str, "to")
        str = RemoveWord(str, "as")
        str = RemoveWord(str, "can")
        str = RemoveWord(str, "that")
        str = RemoveWord(str, "on")
        str = RemoveWord(str, "and")
		str = RemoveWord(str, "the")
		str = RemoveWord(str, "of")
	
        Dim words As String() = str.ToLower().Split({" "c, ","c, "."c, ";"c, ":"c, "!"c, "?"c}, StringSplitOptions.RemoveEmptyEntries)
        Dim wordCount = words.GroupBy(Function(word) word).ToDictionary(Function(g) g.Key, Function(g) CLng(g.Count()))
				
        Return wordCount.OrderByDescending(Function(kvp) kvp.Value).ThenBy(Function(kvp) kvp.Key).Take(n).ToDictionary(Function(kvp) kvp.Key, Function(kvp) kvp.Value)
    End Function

    Public Shared Sub Main(ByVal args As String())
        Dim str As String = "VB.NET is a multi-paradigm, object-oriented programming language, " & 
							"implemented on .NET, Mono, and the .NET Framework. " & 
							"Microsoft launched VB.NET in 2002 as the successor to its original " & 
							"Visual Basic language, the last version of which was Visual Basic 6.0 " & 
							"Along with C# and F#, it is one of the three main languages targeting the .NET ecosystem."
        Dim n As Integer = 5
        Dim topNWords = GetTopNWords(str, n)

        For Each kvp In topNWords
            Console.WriteLine(kvp.Key)
        Next
    End Sub
End Class
					

' run:
'
' net
' basic
' language
' vb
' visual
'

 



answered Feb 2 by avibootz
...