Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 7becf86

Browse files
Merge pull request #231 from Trivadis/feature/issue-228-detect-charset
Feature/issue 228 detect charset
2 parents c3d2f32 + 8c1a3a5 commit 7becf86

9 files changed

Lines changed: 287 additions & 43 deletions

File tree

.gitattributes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Test file encoded in windows-1252
2+
windows-1252.sql encoding=cp1252

sqlcl/format.js

Lines changed: 69 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
var javaString = Java.type("java.lang.String");
2424
var javaSystem = Java.type("java.lang.System");
2525
// java.nio
26+
var javaByteBuffer = Java.type("java.nio.ByteBuffer");
27+
var javaCharset = Java.type("java.nio.charset.Charset");
2628
var javaFiles = Java.type("java.nio.file.Files");
2729
var javaFileSystems = Java.type("java.nio.file.FileSystems");
2830
var javaPaths = Java.type("java.nio.file.Paths");
@@ -179,8 +181,8 @@ var readFile = function (file) {
179181
return new javaString(javaFiles.readAllBytes(file));
180182
}
181183

182-
var writeFile = function (file, content) {
183-
var writer = javaFiles.newBufferedWriter(file);
184+
var writeFile = function (file, content, charset) {
185+
var writer = javaFiles.newBufferedWriter(file, charset);
184186
writer.write(content);
185187
writer.close();
186188
}
@@ -533,36 +535,59 @@ var isMarkdownFile = function (file, markdownExtensions) {
533535
return false;
534536
}
535537

536-
var formatMarkdownFile = function (file, formatter, serr) {
537-
var original = readFile(file)
538-
var p = javaPattern.compile("(```\\s*sql\\s*\\n)(.+?)(\\n```)", javaPattern.DOTALL);
539-
var m = p.matcher(original);
540-
var result = "";
541-
var pos = 0;
542-
var consoleOutput = false;
543-
if (serr == "all" || serr == "mext") {
544-
consoleOutput = true;
545-
}
546-
var sqlBlock = 0;
547-
while (m.find()) {
548-
sqlBlock++;
549-
ctx.write("#" + sqlBlock + "... ");
550-
result += original.substring(pos, m.end(1));
551-
if (hasParseErrors(m.group(2), consoleOutput)) {
552-
ctx.write("skipped... ")
553-
result += original.substring(m.start(2), m.end(3));
554-
} else {
555-
ctx.write("done... ")
556-
result += formatter.format(m.group(2));
557-
result += original.substring(m.end(2), m.end(3));
538+
var detectCharset = function(content) {
539+
// rudimentary solution since Apache Tika cannot be used in SQLcl
540+
// try default character set of the OS (can be overridden via -Dfile.encoding), then UTF-8, then windows-1252
541+
var defaultCharsetName = javaCharset.defaultCharset().name();
542+
var charsetNames = [defaultCharsetName, "UTF-8", "windows-1252"];
543+
for (var i = 0; i < charsetNames.length; i++) {
544+
var cs = javaCharset.forName(charsetNames[i]);
545+
try {
546+
cs.newDecoder().decode(javaByteBuffer.wrap(content));
547+
return cs;
548+
} catch(e) {
549+
// ignore exception
558550
}
559-
pos = m.end(3);
560551
}
561-
if (original.length > pos) {
562-
result += original.substring(pos);
552+
return null;
553+
}
554+
555+
var formatMarkdownFile = function (file, formatter, serr) {
556+
var bytes = javaFiles.readAllBytes(file);
557+
var charset = detectCharset(bytes);
558+
if (charset == null) {
559+
ctx.write("skipped due to unknown character set.\n");
560+
} else {
561+
var original = new javaString(bytes, charset);
562+
var p = javaPattern.compile("(```\\s*sql\\s*\\n)(.+?)(\\n```)", javaPattern.DOTALL);
563+
var m = p.matcher(original);
564+
var result = "";
565+
var pos = 0;
566+
var consoleOutput = false;
567+
if (serr == "all" || serr == "mext") {
568+
consoleOutput = true;
569+
}
570+
var sqlBlock = 0;
571+
while (m.find()) {
572+
sqlBlock++;
573+
ctx.write("#" + sqlBlock + "... ");
574+
result += original.substring(pos, m.end(1));
575+
if (hasParseErrors(m.group(2), consoleOutput)) {
576+
ctx.write("skipped... ")
577+
result += original.substring(m.start(2), m.end(3));
578+
} else {
579+
ctx.write("done... ")
580+
result += formatter.format(m.group(2));
581+
result += original.substring(m.end(2), m.end(3));
582+
}
583+
pos = m.end(3);
584+
}
585+
if (original.length > pos) {
586+
result += original.substring(pos);
587+
}
588+
writeFile(file, result, charset);
589+
ctx.write("done.\n");
563590
}
564-
writeFile(file, result);
565-
ctx.write("done.\n");
566591
}
567592

568593
var getLineSeparator = function (input) {
@@ -578,16 +603,22 @@ var getLineSeparator = function (input) {
578603
}
579604

580605
var formatFile = function (file, formatter, serr) {
581-
var original = readFile(file)
582-
var consoleOutput = false;
583-
if (serr == "all" || serr == "ext") {
584-
consoleOutput = true;
585-
}
586-
if (hasParseErrors(original, consoleOutput)) {
587-
ctx.write("skipped.\n");
606+
var bytes = javaFiles.readAllBytes(file);
607+
var charset = detectCharset(bytes);
608+
if (charset == null) {
609+
ctx.write("skipped due to unknown character set.\n");
588610
} else {
589-
writeFile(file, formatter.format(original) + getLineSeparator(original));
590-
ctx.write("done.\n");
611+
var original = new javaString(bytes, charset);
612+
var consoleOutput = false;
613+
if (serr == "all" || serr == "ext") {
614+
consoleOutput = true;
615+
}
616+
if (hasParseErrors(original, consoleOutput)) {
617+
ctx.write("skipped.\n");
618+
} else {
619+
writeFile(file, formatter.format(original) + getLineSeparator(original), charset);
620+
ctx.write("done.\n");
621+
}
591622
}
592623
}
593624

standalone/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ The parameters are the same as for the [executable JAR](#executable-jar).
5151

5252
## How to Build
5353

54-
1. [Download](https://www.oracle.com/tools/downloads/sqlcl-downloads.html) and install SQLcl 22.2.0
54+
1. [Download](https://www.oracle.com/tools/downloads/sqlcl-downloads.html) and install SQLcl 22.2.1
5555
2. [Download](https://github.com/graalvm/graalvm-ce-builds/releases) and install the GraalVM JDK 17 22.2.0
5656
3. Go to the `bin` directory of the GraalVM JDK and run `./gu install js native-image`, if you want to produce a native image
5757
4. [Download](https://maven.apache.org/download.cgi) and install Apache Maven 3.8.6

standalone/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,10 @@
386386
<classpath>
387387
${project.build.directory}/${project.build.finalName}.jar:${settings.localRepository}/oracle/dbtools/dbtools-common/${sqlcl.version}/dbtools-common-${sqlcl.version}.jar:${settings.localRepository}/org/reflections/reflections/${reflections.version}/reflections-${reflections.version}.jar:${settings.localRepository}/org/javassist/javassist/${javassist.version}/javassist-${javassist.version}.jar:${settings.localRepository}/org/slf4j/slf4j-api/${slf4j.version}/slf4j-api-${slf4j.version}.jar:${settings.localRepository}/org/slf4j/slf4j-jdk14/${slf4j.version}/slf4j-jdk14-${slf4j.version}.jar:${settings.localRepository}/org/graalvm/js/js-scriptengine/${graalvm.version}/js-scriptengine-${graalvm.version}.jar
388388
</classpath>
389+
<buildArgs combine.children="append">
390+
<!-- including all charsets works for build-native only!?! -->
391+
<buildArg>-H:+AddAllCharsets</buildArg>
392+
</buildArgs>
389393
</configuration>
390394
</execution>
391395
</executions>

standalone/src/main/resources/META-INF/native-image/com.trivadis.plsql.formatter/tvdformat/reflect-config.json

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,28 @@
124124
"allPublicFields": true,
125125
"allPublicClasses": true
126126
},
127+
{
128+
"name": "java.nio.ByteBuffer",
129+
"allDeclaredConstructors": true,
130+
"allDeclaredMethods": true,
131+
"allDeclaredFields": true,
132+
"allDeclaredClasses": true,
133+
"allPublicConstructors": true,
134+
"allPublicMethods": true,
135+
"allPublicFields": true,
136+
"allPublicClasses": true
137+
},
138+
{
139+
"name": "java.nio.charset.Charset",
140+
"allDeclaredConstructors": true,
141+
"allDeclaredMethods": true,
142+
"allDeclaredFields": true,
143+
"allDeclaredClasses": true,
144+
"allPublicConstructors": true,
145+
"allPublicMethods": true,
146+
"allPublicFields": true,
147+
"allPublicClasses": true
148+
},
127149
{
128150
"name": "java.nio.file.Files",
129151
"allDeclaredConstructors": true,
@@ -190,6 +212,17 @@
190212
"allPublicFields": true,
191213
"allPublicClasses": true
192214
},
215+
{
216+
"name": "java.util.Collections$UnmodifiableSortedMap",
217+
"allDeclaredConstructors": true,
218+
"allDeclaredMethods": true,
219+
"allDeclaredFields": true,
220+
"allDeclaredClasses": true,
221+
"allPublicConstructors": true,
222+
"allPublicMethods": true,
223+
"allPublicFields": true,
224+
"allPublicClasses": true
225+
},
193226
{
194227
"name": "java.util.HashMap",
195228
"allDeclaredConstructors": true,
@@ -267,6 +300,17 @@
267300
"allPublicFields": true,
268301
"allPublicClasses": true
269302
},
303+
{
304+
"name": "java.util.SortedMap",
305+
"allDeclaredConstructors": true,
306+
"allDeclaredMethods": true,
307+
"allDeclaredFields": true,
308+
"allDeclaredClasses": true,
309+
"allPublicConstructors": true,
310+
"allPublicMethods": true,
311+
"allPublicFields": true,
312+
"allPublicClasses": true
313+
},
270314
{
271315
"name": "java.util.stream.Collectors",
272316
"allDeclaredConstructors": true,
@@ -542,5 +586,38 @@
542586
},
543587
{
544588
"name": "org.graalvm.polyglot.management.Management"
589+
},
590+
{
591+
"name": "sun.nio.cs.MS1252$Holder",
592+
"allDeclaredConstructors": true,
593+
"allDeclaredMethods": true,
594+
"allDeclaredFields": true,
595+
"allDeclaredClasses": true,
596+
"allPublicConstructors": true,
597+
"allPublicMethods": true,
598+
"allPublicFields": true,
599+
"allPublicClasses": true
600+
},
601+
{
602+
"name": "sun.nio.cs.SingleByte$Decoder",
603+
"allDeclaredConstructors": true,
604+
"allDeclaredMethods": true,
605+
"allDeclaredFields": true,
606+
"allDeclaredClasses": true,
607+
"allPublicConstructors": true,
608+
"allPublicMethods": true,
609+
"allPublicFields": true,
610+
"allPublicClasses": true
611+
},
612+
{
613+
"name": "sun.nio.cs.UTF_8$Decoder",
614+
"allDeclaredConstructors": true,
615+
"allDeclaredMethods": true,
616+
"allDeclaredFields": true,
617+
"allDeclaredClasses": true,
618+
"allPublicConstructors": true,
619+
"allPublicMethods": true,
620+
"allPublicFields": true,
621+
"allPublicClasses": true
545622
}
546-
]
623+
]

standalone/src/test/java/com/trivadis/plsql/formatter/sqlcl/tests/AbstractSqlclTest.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import java.nio.file.Paths;
1111
import java.util.function.Predicate;
1212
import java.util.logging.LogManager;
13-
import java.util.stream.Collectors;
1413

1514
public abstract class AbstractSqlclTest {
1615
static {
@@ -49,6 +48,7 @@ public void reset() {
4948
setup();
5049
}
5150

51+
@SuppressWarnings("resource")
5252
@BeforeEach
5353
public void setup() {
5454
byteArrayOutputStream.reset();
@@ -68,8 +68,7 @@ public void setup() {
6868
var url = Thread.currentThread().getContextClassLoader().getResource("unformatted");
6969
assert url != null;
7070
var unformattedDir = Paths.get(url.getPath());
71-
var sources = Files.walk(unformattedDir).filter(Files::isRegularFile)
72-
.collect(Collectors.toList());
71+
var sources = Files.walk(unformattedDir).filter(Files::isRegularFile).toList();
7372
for (Path source : sources) {
7473
Path target = Paths.get(tempDir.toString() + File.separator + source.getFileName());
7574
Files.copy(source, target);

0 commit comments

Comments
 (0)