// An inverted index of a string is building a data structure that
// maps each word to positions where that word appears.
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAX_WORDS 256
#define MAX_WORD_LEN 64
// ------------------------------------------------------------
// cleanWord
// Removes punctuation and converts to lowercase.
// This ensures consistent indexing.
// ------------------------------------------------------------
void cleanWord(const char *input, char *output) {
int j = 0;
for (int i = 0; input[i] != '\0'; i++) {
if (isalnum((unsigned char)input[i])) {
output[j++] = tolower((unsigned char)input[i]);
}
}
output[j] = '\0';
}
// ------------------------------------------------------------
// findWordIndex
// Returns index of word in dictionary, or -1 if not found.
// ------------------------------------------------------------
int findWordIndex(char words[][MAX_WORD_LEN], int wordCount, const char *word) {
for (int i = 0; i < wordCount; i++) {
if (strcmp(words[i], word) == 0) {
return i;
}
}
return -1;
}
// ------------------------------------------------------------
// buildInvertedIndex
// Builds an inverted index using simple arrays.
// words[] holds unique words.
// positions[][] holds lists of positions for each word.
// ------------------------------------------------------------
int buildInvertedIndex(
const char *text,
char words[][MAX_WORD_LEN],
int positions[][MAX_WORDS],
int counts[]
) {
char buffer[1024] = "";
strcpy(buffer, text);
int wordCount = 0;
int position = 0;
char *token = strtok(buffer, " ");
while (token != NULL) {
char cleaned[MAX_WORD_LEN];
cleanWord(token, cleaned);
if (cleaned[0] != '\0') {
int idx = findWordIndex(words, wordCount, cleaned);
if (idx == -1) {
// New word
strcpy(words[wordCount], cleaned);
positions[wordCount][counts[wordCount]++] = position;
wordCount++;
} else {
// Existing word
positions[idx][counts[idx]++] = position;
}
}
position++;
token = strtok(NULL, " ");
}
return wordCount;
}
int main() {
const char text[] =
"Imagine all the people living for today. "
"Imagine there's no heaven. Imagine all the people.";
char words[MAX_WORDS][MAX_WORD_LEN] = {0};
int positions[MAX_WORDS][MAX_WORDS] = {0};
int counts[MAX_WORDS] = {0};
int wordCount = buildInvertedIndex(text, words, positions, counts);
printf("Inverted Index:\n");
for (int i = 0; i < wordCount; i++) {
printf("%s: ", words[i]);
for (int j = 0; j < counts[i]; j++) {
printf("%d ", positions[i][j]);
}
printf("\n");
}
return 0;
}
/*
run:
Inverted Index:
imagine: 0 7 11
all: 1 12
the: 2 13
people: 3 14
living: 4
for: 5
today: 6
theres: 8
no: 9
heaven: 10
*/