1313import java .nio .file .Files ;
1414import java .nio .file .Path ;
1515import java .nio .file .Paths ;
16+ import java .util .Arrays ;
1617
1718public class CharsetTest extends AbstractSqlclTest {
1819
@@ -73,32 +74,24 @@ public void formatWindows1252() {
7374 class DetectCharsetJava {
7475
7576 private Charset detectCharset (byte [] content ) {
76- try {
77- // try default character set of the OS (can be overridden via -Dfile.encoding)
78- var cs = Charset .defaultCharset ();
79- cs .newDecoder ().decode (ByteBuffer .wrap (content ));
80- return cs ;
81- } catch (CharacterCodingException e ) {
82- // default is not working, try another character set
83- // rudimentary solution since Apache Tika cannot be used in SQLcl
84- for (Charset cs : Charset .availableCharsets ().values ().stream ().filter (
85- it -> !it .name ().equals (Charset .defaultCharset ().name ())
86- && (it .name ().equals ("UTF-8" ) || it .name ().equals ("windows-1252" )
87- )).toList ()) {
88- try {
89- cs .newDecoder ().decode (ByteBuffer .wrap (content ));
90- return cs ;
91- } catch (CharacterCodingException ex ) {
92- return null ;
93- }
77+ // rudimentary solution since Apache Tika cannot be used in SQLcl
78+ // try default character set of the OS (can be overridden via -Dfile.encoding), then UTF-8, then windows-1252
79+ var defaultCharsetName = Charset .defaultCharset ().name ();
80+ var charsetNames = Arrays .asList ("UTF-8" , defaultCharsetName , "windows-1252" );
81+ for (var charsetName : charsetNames ) {
82+ var cs = Charset .forName (charsetName );
83+ try {
84+ cs .newDecoder ().decode (ByteBuffer .wrap (content ));
85+ return cs ;
86+ } catch (CharacterCodingException e ) {
87+ // ignore exception
9488 }
95- return null ;
9689 }
90+ return null ;
9791 }
9892
9993 @ Test
10094 public void detect_windows_1252 () throws IOException {
101- Charset .forName ("windows-1252" );
10295 var file = sourceFile ("windows-1252.sql" );
10396 var content = Files .readAllBytes (file );
10497 var actual = detectCharset (content );
@@ -108,7 +101,6 @@ public void detect_windows_1252() throws IOException {
108101
109102 @ Test
110103 public void detect_utf_8 () throws IOException {
111- Charset .forName ("UTF-8" );
112104 var file = sourceFile ("utf8.sql" );
113105 var content = Files .readAllBytes (file );
114106 var actual = detectCharset (content );
0 commit comments