How to find the longest shared prefix among all words in a string with PHP

1 Answer

0 votes
// Extract alphabetic words (lowercased)
function extractWords(string $text): array {
    preg_match_all('/[A-Za-z]+/', $text, $matches);
    return array_map('strtolower', $matches[0]);
}

// Build prefix → list of words that share that prefix
function groupByPrefixes(array $words): array {
    $groups = [];

    foreach ($words as $w) {
        for ($i = 1; $i <= strlen($w); $i++) {
            $prefix = substr($w, 0, $i);

            if (!isset($groups[$prefix])) {
                $groups[$prefix] = [];
            }

            $groups[$prefix][] = $w;
        }
    }

    return $groups;
}

// Find the longest prefix that appears in 2+ words
function longestSharedPrefix(array $groups): string {
    $best = "";

    foreach ($groups as $prefix => $list) {
        if (count($list) >= 2 && strlen($prefix) > strlen($best)) {
            $best = $prefix;
        }
    }

    return $best;
}


// ------------------------------------------------------------
// Main
// ------------------------------------------------------------

$text = "The Lowly inhabitants of the lowland were surprised to see "
      . "the lower branches of the trees.";

$words  = extractWords($text);
$groups = groupByPrefixes($words);
$prefix = longestSharedPrefix($groups);

if ($prefix !== "") {
    $group = $groups[$prefix];
    echo "Longest shared prefix:\n";
    echo $prefix . " | prefix_len=" . strlen($prefix)
        . " | group_count=" . count($group)
        . " | [" . implode(", ", $group) . "]\n";
} else {
    echo "No shared prefix found.\n";
}



/*
run:

Longest shared prefix:
lowl | prefix_len=4 | group_count=2 | [lowly, lowland]

*/

 



answered Mar 14 by avibootz

Related questions

...