How to create an inverted index of a string in C++

1 Answer

0 votes
// An inverted index of a string is building a data structure that 
// maps each word to positions where that word appears. 

#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include <sstream>
#include <algorithm>

// ------------------------------------------------------------
// cleanWord
// Removes punctuation and converts to lowercase.
// This ensures consistent indexing.
// ------------------------------------------------------------
std::string cleanWord(const std::string& word) {
    std::string cleaned;

    for (char c : word) {
        if (std::isalnum(static_cast<unsigned char>(c))) {
            cleaned += std::tolower(c);
        }
    }

    return cleaned;
}

// ------------------------------------------------------------
// buildInvertedIndex
// Creates an inverted index mapping each word to the positions
// where it appears in the input string.
// ------------------------------------------------------------
std::unordered_map<std::string, std::vector<int>>
buildInvertedIndex(const std::string& text) {

    std::unordered_map<std::string, std::vector<int>> index;
    std::istringstream iss(text);
    std::string word;
    int position = 0;

    while (iss >> word) {
        std::string cleaned = cleanWord(word);

        if (!cleaned.empty()) {
            index[cleaned].push_back(position);
        }

        position++;
    }

    return index;
}

int main() {
    std::string text =
        "Imagine all the people living for today. "
        "Imagine there's no heaven. Imagine all the people.";

    // Build the inverted index
    auto index = buildInvertedIndex(text);

    // Print the inverted index
    std::cout << "Inverted Index:\n";
    for (const auto& entry : index) {
        std::cout << entry.first << ": ";

        for (int pos : entry.second) {
            std::cout << pos << " ";
        }

        std::cout << "\n";
    }
}


/*
run:

Inverted Index:
heaven: 10 
no: 9 
today: 6 
living: 4 
theres: 8 
people: 3 14 
the: 2 13 
all: 1 12 
for: 5 
imagine: 0 7 11 

*/

 



answered May 10 by avibootz
...