How to check if an array of bytes is a valid UTF-8 string in Scala

1 Answer

0 votes
import java.nio.charset.{Charset, CodingErrorAction}
import java.nio.ByteBuffer

object Utf8Validator {
  def isValidUTF8(data: Array[Byte]): Boolean = {
    val decoder = Charset.forName("UTF-8")
      .newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT)

    try {
      decoder.decode(ByteBuffer.wrap(data))
      true
    } catch {
      case _: Exception => false
    }
  }

  def main(args: Array[String]): Unit = {
    val arr1 = "Hello, 世界".getBytes("UTF-8")
    val arr2 = Array[Byte](0xa3.toByte, 0xed.toByte, 0xfd.toByte)

    println(if (isValidUTF8(arr1)) "true" else "false")
    println(if (isValidUTF8(arr2)) "true" else "false")
  }
}
 
 
  
/*
run:
 
true
false
 
*/

 



answered Jul 8, 2025 by avibootz
...