How to get the N top words of a string by occurrences in JavaScript

2 Answers

0 votes
function getTopNWords(str, n) {
    const wordRegExp = "/\w+(?:'\w{1,2})?/g"; // remove the " "
    // const wordRegExp = /\w+(?:'\w{1,2})?/g; // correct
    let words = {};
    let matches;
     
    while ((matches = wordRegExp.exec(str)) != null) {
        let word = matches[0].toLowerCase();
         
        // Exclude stop words (commonly used words)
        if (word == "a" || word == "and" || word == "the" || word == "of" ||
            word == "is" || word == "for") {
            continue;
        }
         
        if (typeof words[word] == "undefined") {
            words[word] = 1;
        }
        else {
            words[word]++;
        }
    }
 
    let wordList = [];
    for (let word in words) {
        if (words.hasOwnProperty(word)) {
            wordList.push([word, words[word]]);
        }
    }
    // Sort by words occurrences
    wordList.sort(function(a, b) { return b[1] - a[1]; });
 
    let topWords = [];
    for (let i = 0; i < n; i++) {
        topWords.push(wordList[i][0]);
    }
     
    return topWords;
}
 
const s = "JavaScript is a programming language "
        + "and core technology of the Web, alongside "
        + "HTML and CSS. 99% of websites use JavaScript " 
        + "on the client side for webpage behavior"
        + "Web browsers have a dedicated JavaScript engine "
        + "that executes the client code. These engines are "
        + "also utilized in some servers and a variety of apps. "
        + "The most popular runtime system for non-browser usage is Node.js."
 
console.log(getTopNWords(s, 4));
 
 
/*
run:
 
[ 'javascript', 'client', '99', 'programming' ]
 
*/

 



answered Feb 1 by avibootz
0 votes
const removeWord = (s, word) => {
    const words = s.toLowerCase().match(/\w+/g);
    const newStr = words.filter(w => w !== word).join(' ');
    
    return newStr;
};

const getTopNWords = (s, n) => {
    // Exclude stop words (commonly used words)
    const stopWords = ["is", "a", "to", "as", "for", "of", 
                       "on", "and", "the", "alongside", "also"];
    for (const word of stopWords) {
        s = removeWord(s, word);
    }

    // Split the string into words
    const words = s.toLowerCase().match(/\w+/g);

    // Count the occurrences of each word
    const wordCount = {};
    words.forEach(word => {
        wordCount[word] = (wordCount[word] || 0) + 1;
    });

    // Sort the words by their occurrences and get the top N words
    const topNWords = Object.entries(wordCount)
        .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
        .slice(0, n)
        .reduce((acc, [key, value]) => {
            acc[key] = value;
            return acc;
        }, {});

    return topNWords;
};

const s = "JavaScript is a programming language "
        + "and core technology of the Web, alongside "
        + "HTML and CSS. 99% of websites use JavaScript "
        + "on the client side for webpage behavior"
        + "Web browsers have a dedicated JavaScript engine "
        + "that executes the client code. These engines are "
        + "also utilized in some servers and a variety of apps. "
        + "The most popular runtime system for non-browser usage is Node.js."

const n = 4;

const topNWords = getTopNWords(s, n);

for (const key in topNWords) {
    console.log(key);
}

 
 
/*
run:
 
99
javascript
client
apps
 
*/

 



answered Feb 2 by avibootz
...