How to get the N top words of a string by occurrences in C#

1 Answer

0 votes
using System;
using System.Linq;
using System.Collections.Generic;

public class TopNWords
{
    static string RemoveWord(string str, string word) {
        string[] words = str.ToLower().Split(new[] { ' ', ',', '.', ';', ':', '!', '?' }, StringSplitOptions.RemoveEmptyEntries);
        string newStr = "";

        foreach (string s in words) {
            if (!s.Equals(word)) {
                newStr += s + " ";
            }
        }

        return newStr.Trim();
    }

    public static Dictionary<string, long> GetTopNWords(string str, int n) {
        // Exclude stop words (commonly used words)
        str = RemoveWord(str, "is");
        str = RemoveWord(str, "a");
        str = RemoveWord(str, "to");
        str = RemoveWord(str, "as");
        str = RemoveWord(str, "can");
        str = RemoveWord(str, "that");
        str = RemoveWord(str, "on");
        str = RemoveWord(str, "and");

        // Split the string into words
        string[] words = str.ToLower().Split(new[] { ' ', ',', '.', ';', ':', '!', '?' }, StringSplitOptions.RemoveEmptyEntries);

        // Count the occurrences of each word
        var wordCount = words.GroupBy(word => word)
                             .ToDictionary(g => g.Key, g => (long)g.Count());

        // Sort the words by their occurrences and get the top N words
        return wordCount.OrderByDescending(kvp => kvp.Value)
                        .ThenBy(kvp => kvp.Key)
                        .Take(n)
                        .ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
    }

    public static void Main(string[] args)
    {
        string str = "C# is a general-purpose high-level programming language " +
                     "supporting multiple paradigms. C# encompasses static typing " +
                     "strong typing, lexically scoped, imperative, declarative, " +
                     "functional, generic, object-oriented class-based, and " +
                     "component-oriented programming disciplines.";
        int n = 5;

        var topNWords = GetTopNWords(str, n);

        foreach (var kvp in topNWords) {
            Console.WriteLine(kvp.Key);
        }
    }
}


/*
run:

c#
programming
typing
class-based
component-oriented

*/

 



answered Feb 2 by avibootz
...