Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 0247edf

Browse files
change detect Charset to match implementation in JS
1 parent 457cbfc commit 0247edf

1 file changed

Lines changed: 13 additions & 21 deletions

File tree

  • standalone/src/test/java/com/trivadis/plsql/formatter/sqlcl/tests

standalone/src/test/java/com/trivadis/plsql/formatter/sqlcl/tests/CharsetTest.java

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.nio.file.Files;
1414
import java.nio.file.Path;
1515
import java.nio.file.Paths;
16+
import java.util.Arrays;
1617

1718
public class CharsetTest extends AbstractSqlclTest {
1819

@@ -73,32 +74,24 @@ public void formatWindows1252() {
7374
class DetectCharsetJava {
7475

7576
private Charset detectCharset(byte[] content) {
76-
try {
77-
// try default character set of the OS (can be overridden via -Dfile.encoding)
78-
var cs = Charset.defaultCharset();
79-
cs.newDecoder().decode(ByteBuffer.wrap(content));
80-
return cs;
81-
} catch (CharacterCodingException e) {
82-
// default is not working, try another character set
83-
// rudimentary solution since Apache Tika cannot be used in SQLcl
84-
for (Charset cs : Charset.availableCharsets().values().stream().filter(
85-
it -> !it.name().equals(Charset.defaultCharset().name())
86-
&& (it.name().equals("UTF-8") || it.name().equals("windows-1252")
87-
)).toList()) {
88-
try {
89-
cs.newDecoder().decode(ByteBuffer.wrap(content));
90-
return cs;
91-
} catch (CharacterCodingException ex) {
92-
return null;
93-
}
77+
// rudimentary solution since Apache Tika cannot be used in SQLcl
78+
// try default character set of the OS (can be overridden via -Dfile.encoding), then UTF-8, then windows-1252
79+
var defaultCharsetName = Charset.defaultCharset().name();
80+
var charsetNames = Arrays.asList("UTF-8", defaultCharsetName, "windows-1252");
81+
for (var charsetName : charsetNames) {
82+
var cs = Charset.forName(charsetName);
83+
try {
84+
cs.newDecoder().decode(ByteBuffer.wrap(content));
85+
return cs;
86+
} catch (CharacterCodingException e) {
87+
// ignore exception
9488
}
95-
return null;
9689
}
90+
return null;
9791
}
9892

9993
@Test
10094
public void detect_windows_1252() throws IOException {
101-
Charset.forName("windows-1252");
10295
var file = sourceFile("windows-1252.sql");
10396
var content = Files.readAllBytes(file);
10497
var actual = detectCharset(content);
@@ -108,7 +101,6 @@ public void detect_windows_1252() throws IOException {
108101

109102
@Test
110103
public void detect_utf_8() throws IOException {
111-
Charset.forName("UTF-8");
112104
var file = sourceFile("utf8.sql");
113105
var content = Files.readAllBytes(file);
114106
var actual = detectCharset(content);

0 commit comments

Comments
 (0)