How to find the prefix that is both long and frequent among all words in a string (length × frequency) with Python

1 Answer

import re
from collections import defaultdict

def group_by_all_prefixes(s):
    words = re.findall(r"[a-zA-Z]+", s.lower())
    groups = defaultdict(list)

    for w in words:
        for i in range(1, len(w) + 1):
            groups[w[:i]].append(w)

    return dict(groups)


s = "Lowly inhabitants of the lowland were surprised to see the lower branches of the trees."

groups = group_by_all_prefixes(s)

# Keep only prefixes that appear in 2+ words
filtered = {p: ws for p, ws in groups.items() if len(ws) >= 2}

# BEST GROUP RULE:
# score = prefix_len * group_count
best_prefix = max(filtered, key=lambda p: len(p) * len(filtered[p]))

print("Best group:")
print(f"{best_prefix} | prefix_len={len(best_prefix)} | group_count={len(filtered[best_prefix])} | {filtered[best_prefix]}")



'''
run:

Best group:
low | prefix_len=3 | group_count=3 | ['lowly', 'lowland', 'lower']

'''

70+ SQL courses for beginners and professionals

answered Mar 12 by avibootz

Most popular tags

How to find the prefix that is both long and frequent among all words in a string (length × frequency) with Python

1 Answer

Related questions