#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h> // tolower // isalpha
#define MAX_WORDS 1024
#define MAX_PREFIXES 8192
// -----------------------------
// Data structures
// -----------------------------
typedef struct {
char *prefix;
char **words;
size_t count;
} PrefixGroup;
// -----------------------------
// Utility: lowercase a string
// -----------------------------
void to_lower(char *s) {
for (; *s; ++s)
*s = (char)tolower(*s);
}
// -----------------------------
// Extract alphabetic words
// -----------------------------
size_t extract_words(const char *text, char **words) {
size_t count = 0;
const char *p = text;
while (*p) {
// Skip non-alpha
while (*p && !isalpha((unsigned char)*p))
p++;
if (!*p) break;
const char *start = p;
// Consume alpha
while (*p && isalpha((unsigned char)*p))
p++;
size_t len = p - start;
char *w = malloc(len + 1);
memcpy(w, start, len);
w[len] = '\0';
to_lower(w);
words[count++] = w;
}
return count;
}
// -----------------------------
// Add a word to a prefix group
// -----------------------------
void add_word_to_group(PrefixGroup *g, const char *word) {
g->words = realloc(g->words, (g->count + 1) * sizeof(char *));
g->words[g->count++] = strdup(word);
}
// -----------------------------
// Find or create prefix group
// -----------------------------
PrefixGroup *find_or_create_group(PrefixGroup *groups, size_t *group_count, const char *prefix) {
for (size_t i = 0; i < *group_count; i++) {
if (strcmp(groups[i].prefix, prefix) == 0)
return &groups[i];
}
// Create new group
groups[*group_count].prefix = strdup(prefix);
groups[*group_count].words = NULL;
groups[*group_count].count = 0;
return &groups[(*group_count)++];
}
// -----------------------------
// Group words by all prefixes
// -----------------------------
size_t group_by_all_prefixes(char **words, size_t word_count, PrefixGroup *groups) {
size_t group_count = 0;
for (size_t w = 0; w < word_count; w++) {
size_t len = strlen(words[w]);
for (size_t i = 1; i <= len; i++) {
char prefix[128];
memcpy(prefix, words[w], i);
prefix[i] = '\0';
PrefixGroup *g = find_or_create_group(groups, &group_count, prefix);
add_word_to_group(g, words[w]);
}
}
return group_count;
}
// -----------------------------
// Find longest prefix with >=2 words
// -----------------------------
PrefixGroup *find_longest_shared_prefix(PrefixGroup *groups, size_t group_count) {
PrefixGroup *best = NULL;
for (size_t i = 0; i < group_count; i++) {
if (groups[i].count >= 2) {
if (!best || strlen(groups[i].prefix) > strlen(best->prefix))
best = &groups[i];
}
}
return best;
}
// -----------------------------
// Main
// -----------------------------
int main() {
const char *text =
"The Lowly inhabitants of the lowland were surprised to see "
"the lower branches of the trees.";
char *words[MAX_WORDS];
PrefixGroup groups[MAX_PREFIXES];
size_t word_count = extract_words(text, words);
size_t group_count = group_by_all_prefixes(words, word_count, groups);
PrefixGroup *best = find_longest_shared_prefix(groups, group_count);
if (best) {
printf("Longest shared prefix:\n");
printf("%s | prefix_len=%zu | group_count=%zu | [",
best->prefix, strlen(best->prefix), best->count);
for (size_t i = 0; i < best->count; i++) {
printf("%s", best->words[i]);
if (i + 1 < best->count) printf(", ");
}
printf("]\n");
}
return 0;
}
/*
run:
Longest shared prefix:
lowl | prefix_len=4 | group_count=2 | [lowly, lowland]
*/