How to get the N top words of a string by occurrences in TypeScript

1 Answer

0 votes
function removeWord(s: string, word: string): string {
    const words = s.toLowerCase().match(/\w+/g) || [];
    const newStr = words.filter(w => w !== word).join(' ');

    return newStr;
}

function getTopNWords(s: string, n: number): Record<string, number> {
    // Exclude stop words (commonly used words)
    const stopWords = ["is", "a", "to", "as", "for", "of",
                       "on", "and", "the", "alongside", "also"];
    for (const word of stopWords) {
        s = removeWord(s, word);
    }

    // Split the string into words
    const words = s.toLowerCase().match(/\w+/g) || [];

    // Count the occurrences of each word
    const wordCount: Record<string, number> = {};
    words.forEach(word => {
        wordCount[word] = (wordCount[word] || 0) + 1;
    });

    // Sort the words by their occurrences and get the top N words
    const topNWords = Object.entries(wordCount)
        .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
        .slice(0, n)
        .reduce((acc, [key, value]) => {
            acc[key] = value;
            return acc;
        }, {} as Record<string, number>);

    return topNWords;
}

const s = "TypeScript is a free and open-source high-level programming language " + 
          "developed by Microsoft that adds static typing with optional type " + 
          "annotations to JavaScript. It is designed for the development of " + 
          "large applications and transpiles to JavaScript. TypeScript " + 
          "may be used to develop JavaScript applications for both " + 
          "client-side and server-side";

const n = 4;

const topNWords = getTopNWords(s, n);

for (const key in topNWords) {
    console.log(key);
}


 
/*
run:
 
"javascript" 
"applications" 
"side" 
"typescript" 
 
*/

 



answered Feb 2 by avibootz
...