How to get the N top words of a string by occurrences in Node.js

1 Answer

0 votes
const removeWord = (s, word) => {
    const words = s.toLowerCase().match(/\w+/g);
    const newStr = words.filter(w => w !== word).join(' ');
     
    return newStr;
};
 
const getTopNWords = (s, n) => {
    // Exclude stop words (commonly used words)
    const stopWords = ["is", "a", "to", "as", "for", "of", "can",
                       "on", "and", "the", "alongside", "also"];
    for (const word of stopWords) {
        s = removeWord(s, word);
    }
 
    // Split the string into words
    const words = s.toLowerCase().match(/\w+/g);

    // Count the occurrences of each word
    const wordCount = {};
    words.forEach(word => {
        wordCount[word] = (wordCount[word] || 0) + 1;
    });
 
    // Sort the words by their occurrences and get the top N words
    const topNWords = Object.entries(wordCount)
        .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
        .slice(0, n)
        .reduce((acc, [key, value]) => {
            acc[key] = value;
            return acc;
        }, {});
 
    return topNWords;
};
 
const s = "Node.js is a cross-platform, open-source JavaScript " + 
          "runtime environment that can run on Windows, Linux, Unix, " + 
          "macOS, and more. Node.js runs on the V8 JavaScript engine, " + 
          "and executes JavaScript code outside a web browser." + 
          "Node.js lets developers use JavaScript to write command line " + 
          "tools and for server-side scripting."
 
const n = 4;
 
const topNWords = getTopNWords(s, n);
 
for (const key in topNWords) {
    console.log(key);
}


  
/*
run:
  
javascript
js
node
browser
  
*/

 



answered Feb 2 by avibootz
...