How to remove unicode characters of a specific range from a string in Java

1 Answer

0 votes
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MyClass {
    public static void main(String args[]) {
        try {
            byte[] utf8Bytes = "java c c++  php".getBytes("UTF-8");

            String utf8 = new String(utf8Bytes, "UTF-8");

            Pattern unicode = Pattern.compile("[^\\x00-\\x7F]",
                                      Pattern.UNICODE_CASE | Pattern.CANON_EQ |
                                      Pattern.CASE_INSENSITIVE);
            Matcher matcher = unicode.matcher(utf8);

            utf8 = matcher.replaceAll(" ");
            
            System.out.println(utf8);
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
    }

}



/*
run:

java c c++   php

*/

 



answered Jul 27, 2020 by avibootz
...