How to find the longest shared prefix among all words in a string with Scala

1 Answer

0 votes
import scala.util.matching.Regex

object LongestSharedPrefix {

  // Extract alphabetic words (lowercased)
  def extractWords(text: String): List[String] = {
    val re: Regex = "[A-Za-z]+".r
    re.findAllIn(text).map(_.toLowerCase).toList
  }

  // Build prefix → list of words that share that prefix
  def groupByPrefixes(words: List[String]): Map[String, List[String]] = {
    words.foldLeft(Map.empty[String, List[String]]) { (groups, w) =>
      val prefixes = (1 to w.length).map(i => w.substring(0, i))
      prefixes.foldLeft(groups) { (acc, prefix) =>
        acc.updated(prefix, acc.getOrElse(prefix, Nil) :+ w)
      }
    }
  }

  // Find the longest prefix that appears in 2+ words
  def longestSharedPrefix(groups: Map[String, List[String]]): Option[String] = {
    groups.collect {
      case (prefix, ws) if ws.size >= 2 => prefix
    }.reduceOption((a, b) => if (a.length >= b.length) a else b)
  }

  def main(args: Array[String]): Unit = {
    val text =
      "The Lowly inhabitants of the lowland were surprised to see " +
      "the lower branches of the trees."

    val words  = extractWords(text)
    val groups = groupByPrefixes(words)
    val prefix = longestSharedPrefix(groups)

    prefix match {
      case Some(p) =>
        val group = groups(p)
        println("Longest shared prefix:")
        println(s"$p | prefix_len=${p.length} | group_count=${group.size} | ${group.mkString("[", ", ", "]")}")

      case None =>
        println("No shared prefix found.")
    }
  }
}





/*
run:

Longest shared prefix:
lowl | prefix_len=4 | group_count=2 | [lowly, lowland]

*/

 



answered Mar 14 by avibootz

Related questions

...