How to find the longest shared prefix among all words in a string with C#

1 Answer

0 votes
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;

class LongestSharedPrefix
{
    // Extract alphabetic words (lowercased)
    static List<string> ExtractWords(string text) {
        var words = new List<string>();
        var matches = Regex.Matches(text, "[A-Za-z]+");

        foreach (Match m in matches)
            words.Add(m.Value.ToLower());

        return words;
    }

    // Build prefix → list of words that share that prefix
    static Dictionary<string, List<string>> GroupByPrefixes(List<string> words) {
        var groups = new Dictionary<string, List<string>>();

        foreach (var w in words) {
            for (int i = 1; i <= w.Length; i++) {
                string prefix = w.Substring(0, i);

                if (!groups.ContainsKey(prefix))
                    groups[prefix] = new List<string>();

                groups[prefix].Add(w);
            }
        }

        return groups;
    }

    // Find the longest prefix that appears in 2+ words
    static string FindLongestSharedPrefix(Dictionary<string, List<string>> groups) {
        string best = "";

        foreach (var kvp in groups) {
            string prefix = kvp.Key;
            var list = kvp.Value;

            if (list.Count >= 2 && prefix.Length > best.Length)
                best = prefix;
        }

        return best;
    }

    static void Main()
    {
        string text =
            "The Lowly inhabitants of the lowland were surprised to see " +
            "the lower branches of the trees.";

        var words = ExtractWords(text);
        var groups = GroupByPrefixes(words);
        var prefix = FindLongestSharedPrefix(groups);

        if (!string.IsNullOrEmpty(prefix)) {
            var group = groups[prefix];
            Console.WriteLine("Longest shared prefix:");
            Console.WriteLine($"{prefix} | prefix_len={prefix.Length} | group_count={group.Count} | [{string.Join(", ", group)}]");
        }
        else {
            Console.WriteLine("No shared prefix found.");
        }
    }
}



/*
run:

Longest shared prefix:
lowl | prefix_len=4 | group_count=2 | [lowly, lowland]

*/

 



answered Mar 14 by avibootz

Related questions

...