2323var javaString = Java . type ( "java.lang.String" ) ;
2424var javaSystem = Java . type ( "java.lang.System" ) ;
2525// java.nio
26+ var javaByteBuffer = Java . type ( "java.nio.ByteBuffer" ) ;
27+ var javaCharset = Java . type ( "java.nio.charset.Charset" ) ;
2628var javaFiles = Java . type ( "java.nio.file.Files" ) ;
2729var javaFileSystems = Java . type ( "java.nio.file.FileSystems" ) ;
2830var javaPaths = Java . type ( "java.nio.file.Paths" ) ;
@@ -179,8 +181,8 @@ var readFile = function (file) {
179181 return new javaString ( javaFiles . readAllBytes ( file ) ) ;
180182}
181183
182- var writeFile = function ( file , content ) {
183- var writer = javaFiles . newBufferedWriter ( file ) ;
184+ var writeFile = function ( file , content , charset ) {
185+ var writer = javaFiles . newBufferedWriter ( file , charset ) ;
184186 writer . write ( content ) ;
185187 writer . close ( ) ;
186188}
@@ -533,36 +535,59 @@ var isMarkdownFile = function (file, markdownExtensions) {
533535 return false ;
534536}
535537
536- var formatMarkdownFile = function ( file , formatter , serr ) {
537- var original = readFile ( file )
538- var p = javaPattern . compile ( "(```\\s*sql\\s*\\n)(.+?)(\\n```)" , javaPattern . DOTALL ) ;
539- var m = p . matcher ( original ) ;
540- var result = "" ;
541- var pos = 0 ;
542- var consoleOutput = false ;
543- if ( serr == "all" || serr == "mext" ) {
544- consoleOutput = true ;
545- }
546- var sqlBlock = 0 ;
547- while ( m . find ( ) ) {
548- sqlBlock ++ ;
549- ctx . write ( "#" + sqlBlock + "... " ) ;
550- result += original . substring ( pos , m . end ( 1 ) ) ;
551- if ( hasParseErrors ( m . group ( 2 ) , consoleOutput ) ) {
552- ctx . write ( "skipped... " )
553- result += original . substring ( m . start ( 2 ) , m . end ( 3 ) ) ;
554- } else {
555- ctx . write ( "done... " )
556- result += formatter . format ( m . group ( 2 ) ) ;
557- result += original . substring ( m . end ( 2 ) , m . end ( 3 ) ) ;
538+ var detectCharset = function ( content ) {
539+ // rudimentary solution since Apache Tika cannot be used in SQLcl
540+ // try default character set of the OS (can be overridden via -Dfile.encoding), then UTF-8, then windows-1252
541+ var defaultCharsetName = javaCharset . defaultCharset ( ) . name ( ) ;
542+ var charsetNames = [ defaultCharsetName , "UTF-8" , "windows-1252" ] ;
543+ for ( var i = 0 ; i < charsetNames . length ; i ++ ) {
544+ var cs = javaCharset . forName ( charsetNames [ i ] ) ;
545+ try {
546+ cs . newDecoder ( ) . decode ( javaByteBuffer . wrap ( content ) ) ;
547+ return cs ;
548+ } catch ( e ) {
549+ // ignore exception
558550 }
559- pos = m . end ( 3 ) ;
560551 }
561- if ( original . length > pos ) {
562- result += original . substring ( pos ) ;
552+ return null ;
553+ }
554+
555+ var formatMarkdownFile = function ( file , formatter , serr ) {
556+ var bytes = javaFiles . readAllBytes ( file ) ;
557+ var charset = detectCharset ( bytes ) ;
558+ if ( charset == null ) {
559+ ctx . write ( "skipped due to unknown character set.\n" ) ;
560+ } else {
561+ var original = new javaString ( bytes , charset ) ;
562+ var p = javaPattern . compile ( "(```\\s*sql\\s*\\n)(.+?)(\\n```)" , javaPattern . DOTALL ) ;
563+ var m = p . matcher ( original ) ;
564+ var result = "" ;
565+ var pos = 0 ;
566+ var consoleOutput = false ;
567+ if ( serr == "all" || serr == "mext" ) {
568+ consoleOutput = true ;
569+ }
570+ var sqlBlock = 0 ;
571+ while ( m . find ( ) ) {
572+ sqlBlock ++ ;
573+ ctx . write ( "#" + sqlBlock + "... " ) ;
574+ result += original . substring ( pos , m . end ( 1 ) ) ;
575+ if ( hasParseErrors ( m . group ( 2 ) , consoleOutput ) ) {
576+ ctx . write ( "skipped... " )
577+ result += original . substring ( m . start ( 2 ) , m . end ( 3 ) ) ;
578+ } else {
579+ ctx . write ( "done... " )
580+ result += formatter . format ( m . group ( 2 ) ) ;
581+ result += original . substring ( m . end ( 2 ) , m . end ( 3 ) ) ;
582+ }
583+ pos = m . end ( 3 ) ;
584+ }
585+ if ( original . length > pos ) {
586+ result += original . substring ( pos ) ;
587+ }
588+ writeFile ( file , result , charset ) ;
589+ ctx . write ( "done.\n" ) ;
563590 }
564- writeFile ( file , result ) ;
565- ctx . write ( "done.\n" ) ;
566591}
567592
568593var getLineSeparator = function ( input ) {
@@ -578,16 +603,22 @@ var getLineSeparator = function (input) {
578603}
579604
580605var formatFile = function ( file , formatter , serr ) {
581- var original = readFile ( file )
582- var consoleOutput = false ;
583- if ( serr == "all" || serr == "ext" ) {
584- consoleOutput = true ;
585- }
586- if ( hasParseErrors ( original , consoleOutput ) ) {
587- ctx . write ( "skipped.\n" ) ;
606+ var bytes = javaFiles . readAllBytes ( file ) ;
607+ var charset = detectCharset ( bytes ) ;
608+ if ( charset == null ) {
609+ ctx . write ( "skipped due to unknown character set.\n" ) ;
588610 } else {
589- writeFile ( file , formatter . format ( original ) + getLineSeparator ( original ) ) ;
590- ctx . write ( "done.\n" ) ;
611+ var original = new javaString ( bytes , charset ) ;
612+ var consoleOutput = false ;
613+ if ( serr == "all" || serr == "ext" ) {
614+ consoleOutput = true ;
615+ }
616+ if ( hasParseErrors ( original , consoleOutput ) ) {
617+ ctx . write ( "skipped.\n" ) ;
618+ } else {
619+ writeFile ( file , formatter . format ( original ) + getLineSeparator ( original ) , charset ) ;
620+ ctx . write ( "done.\n" ) ;
621+ }
591622 }
592623}
593624
0 commit comments