How to get the N top words of a string by occurrences in C++

1 Answer

0 votes
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
#include <algorithm>

struct WordCount {
    std::string word;
    int count;
};

bool compare(const WordCount& a, const WordCount& b) {
    return a.count > b.count;
}

int main() {
    std::string str = "C++ is a high-level, general-purpose programming language "
                      "created by Danish computer scientist Bjarne Stroustrup. "
                      "First released in 1985 as an extension of the C programming language, "
                      "it has since expanded significantly over time; as of 1997, C++ has "
                      "object-oriented, generic, and functional features, "
                      "It is usually implemented as a compiled language";
    int N = 4; // Number of top words to find
    
    std::unordered_map<std::string, int> wordMap;
    std::istringstream stream(str);
    std::string word;
    while (stream >> word) {
        wordMap[word]++;
    }
    
    std::vector<WordCount> wordCounts;
    for (const auto& pair : wordMap) {
        // Exclude stop words (commonly used words)
        if (pair.first == "is" || pair.first == "a" ||
            pair.first == "the" || pair.first == "It" ||
            pair.first == "in" || pair.first == "and" ||
            pair.first == "has" || pair.first == "of" ||
            pair.first == "as" || pair.first == "an") {
            continue;
        }
        wordCounts.push_back({pair.first, pair.second});
    }
    
    std::sort(wordCounts.begin(), wordCounts.end(), compare);
    
    for (int i = 0; i < N && i < wordCounts.size(); ++i) {
        std::cout << wordCounts[i].word << "\n";
    }
}


/*
run:

language
programming
C++
Bjarne

*/

 



answered Feb 2, 2025 by avibootz

Related questions

1 answer 84 views
1 answer 130 views
1 answer 114 views
1 answer 128 views
1 answer 112 views
1 answer 120 views
...