// An inverted index of a string maps each word to the positions
// where that word appears. This version also supports phrase search.
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include <sstream> // istringstream
// ------------------------------------------------------------
// cleanWord
// Removes punctuation and converts to lowercase.
// Ensures consistent indexing.
// ------------------------------------------------------------
std::string cleanWord(const std::string& word) {
std::string cleaned;
for (char c : word) {
if (std::isalnum(static_cast<unsigned char>(c))) {
cleaned += std::tolower(static_cast<unsigned char>(c));
}
}
return cleaned;
}
// ------------------------------------------------------------
// buildInvertedIndex
// Creates an inverted index mapping each word to the positions
// where it appears in the input string.
// ------------------------------------------------------------
std::unordered_map<std::string, std::vector<int>>
buildInvertedIndex(const std::string& text, std::vector<std::string>& orderedWords) {
std::unordered_map<std::string, std::vector<int>> index;
std::istringstream iss(text);
std::string word;
int position = 0;
while (iss >> word) {
std::string cleaned = cleanWord(word);
if (!cleaned.empty()) {
orderedWords.push_back(cleaned);
index[cleaned].push_back(position);
position++;
}
}
return index;
}
// ------------------------------------------------------------
// phraseSearch
// Searches for a multi-word phrase using the ordered word list.
// Returns all starting positions where the phrase appears.
// ------------------------------------------------------------
std::vector<int> phraseSearch(
const std::vector<std::string>& orderedWords,
const std::string& phrase
) {
std::vector<int> results;
// Split phrase into cleaned words
std::istringstream iss(phrase);
std::string word;
std::vector<std::string> phraseWords;
while (iss >> word) {
std::string cleaned = cleanWord(word);
if (!cleaned.empty()) {
phraseWords.push_back(cleaned);
}
}
if (phraseWords.empty()) return results;
// Scan orderedWords for matching sequences
for (size_t i = 0; i + phraseWords.size() <= orderedWords.size(); i++) {
bool match = true;
for (size_t j = 0; j < phraseWords.size(); j++) {
if (orderedWords[i + j] != phraseWords[j]) {
match = false;
break;
}
}
if (match) {
results.push_back(static_cast<int>(i));
}
}
return results;
}
int main() {
std::string text =
"Imagine all the people living for today. "
"Imagine there's no heaven. Imagine all the people living for today.";
// Store words in order for phrase search
std::vector<std::string> orderedWords;
// Build the inverted index
auto index = buildInvertedIndex(text, orderedWords);
// Print the inverted index
std::cout << "Inverted Index:\n";
for (const auto& entry : index) {
std::cout << entry.first << ": ";
for (int pos : entry.second) {
std::cout << pos << " ";
}
std::cout << "\n";
}
// Phrase search examples
std::string phrase = "living for today";
auto positions = phraseSearch(orderedWords, phrase);
std::cout << "\nPhrase Search: \"" << phrase << "\"\nPositions: ";
for (int pos : positions) {
std::cout << pos << " ";
}
std::cout << "\n";
}
/*
run:
Inverted Index:
heaven: 10
no: 9
today: 6 17
living: 4 15
theres: 8
people: 3 14
the: 2 13
all: 1 12
for: 5 16
imagine: 0 7 11
Phrase Search: "living for today"
Positions: 4 15
*/