@@ -222,6 +222,14 @@ public class Tokenizer implements Locator {
222222
223223 public static final int AMBIGUOUS_AMPERSAND = 75 ;
224224
225+ public static final int COMMENT_LESSTHAN = 76 ;
226+
227+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
228+
229+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
230+
231+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
232+
225233 /**
226234 * Magic value for UTF-16 operations.
227235 */
@@ -1034,9 +1042,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10341042
10351043 // ]NOCPP]
10361044
1037- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1045+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
10381046 throws SAXException {
1039- errConsecutiveHyphens ();
10401047 // [NOCPP[
10411048 switch (commentPolicy ) {
10421049 case ALTER_INFOSET :
@@ -1047,7 +1054,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10471054 appendStrBuf ('-' );
10481055 // CPPONLY: MOZ_FALLTHROUGH;
10491056 case ALLOW :
1050- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1057+ if (!reportedConsecutiveHyphens ) {
1058+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1059+ }
10511060 // ]NOCPP]
10521061 appendStrBuf (c );
10531062 // [NOCPP[
@@ -1509,6 +1518,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
15091518 @ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
15101519 int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
15111520 int endPos ) throws SAXException {
1521+ boolean reportedConsecutiveHyphens = false ;
15121522 /*
15131523 * Idioms used in this code:
15141524 *
@@ -2594,6 +2604,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25942604 }
25952605 // CPPONLY: MOZ_FALLTHROUGH;
25962606 case COMMENT_START :
2607+ reportedConsecutiveHyphens = false ;
25972608 commentstartloop : for (;;) {
25982609 if (++pos == endPos ) {
25992610 break stateloop ;
@@ -2626,6 +2637,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26262637 */
26272638 state = transition (state , Tokenizer .DATA , reconsume , pos );
26282639 continue stateloop ;
2640+ case '<' :
2641+ appendStrBuf (c );
2642+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2643+ continue stateloop ;
26292644 case '\r' :
26302645 appendStrBufCarriageReturn ();
26312646 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2671,6 +2686,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26712686 state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
26722687 break commentloop ;
26732688 // continue stateloop;
2689+ case '<' :
2690+ appendStrBuf (c );
2691+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2692+ continue stateloop ;
26742693 case '\r' :
26752694 appendStrBufCarriageReturn ();
26762695 break stateloop ;
@@ -2713,6 +2732,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27132732 state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
27142733 break commentenddashloop ;
27152734 // continue stateloop;
2735+ case '<' :
2736+ appendStrBuf (c );
2737+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2738+ continue stateloop ;
27162739 case '\r' :
27172740 appendStrBufCarriageReturn ();
27182741 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2767,11 +2790,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27672790 * Append a U+002D HYPHEN-MINUS (-) character to
27682791 * the comment token's data.
27692792 */
2770- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2793+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2794+ reportedConsecutiveHyphens = true ;
27712795 /*
27722796 * Stay in the comment end state.
27732797 */
27742798 continue ;
2799+ case '<' :
2800+ appendStrBuf (c );
2801+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2802+ continue stateloop ;
27752803 case '\r' :
27762804 adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
27772805 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2781,7 +2809,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27812809 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
27822810 continue stateloop ;
27832811 case '!' :
2784- errHyphenHyphenBang ();
27852812 appendStrBuf (c );
27862813 state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
27872814 continue stateloop ;
@@ -2794,7 +2821,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27942821 * and the input character to the comment
27952822 * token's data.
27962823 */
2797- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2824+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2825+ reportedConsecutiveHyphens = true ;
27982826 /*
27992827 * Switch to the comment state.
28002828 */
@@ -2864,6 +2892,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28642892 continue stateloop ;
28652893 }
28662894 }
2895+ case COMMENT_LESSTHAN :
2896+ for (;;) {
2897+ if (++pos == endPos ) {
2898+ break stateloop ;
2899+ }
2900+ c = checkChar (buf , pos );
2901+ switch (c ) {
2902+ case '!' :
2903+ appendStrBuf (c );
2904+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2905+ continue stateloop ;
2906+ case '<' :
2907+ appendStrBuf (c );
2908+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2909+ continue stateloop ;
2910+ case '-' :
2911+ appendStrBuf (c );
2912+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2913+ continue stateloop ;
2914+ case '\r' :
2915+ appendStrBufCarriageReturn ();
2916+ break stateloop ;
2917+ case '\n' :
2918+ appendStrBufLineFeed ();
2919+ continue ;
2920+ case '\u0000' :
2921+ c = '\uFFFD' ;
2922+ // fall thru
2923+ default :
2924+ appendStrBuf (c );
2925+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2926+ continue stateloop ;
2927+ }
2928+ }
2929+ case COMMENT_LESSTHAN_BANG :
2930+ for (;;) {
2931+ if (++pos == endPos ) {
2932+ break stateloop ;
2933+ }
2934+ c = checkChar (buf , pos );
2935+ switch (c ) {
2936+ case '-' :
2937+ appendStrBuf (c );
2938+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2939+ continue stateloop ;
2940+ case '<' :
2941+ appendStrBuf (c );
2942+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2943+ continue stateloop ;
2944+ case '\r' :
2945+ appendStrBufCarriageReturn ();
2946+ break stateloop ;
2947+ case '\n' :
2948+ appendStrBufLineFeed ();
2949+ continue ;
2950+ case '\u0000' :
2951+ c = '\uFFFD' ;
2952+ // fall thru
2953+ default :
2954+ appendStrBuf (c );
2955+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2956+ continue stateloop ;
2957+ }
2958+ }
2959+ case COMMENT_LESSTHAN_BANG_DASH :
2960+ for (;;) {
2961+ if (++pos == endPos ) {
2962+ break stateloop ;
2963+ }
2964+ c = checkChar (buf , pos );
2965+ switch (c ) {
2966+ case '-' :
2967+ appendStrBuf (c );
2968+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2969+ continue stateloop ;
2970+ case '<' :
2971+ appendStrBuf (c );
2972+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2973+ continue stateloop ;
2974+ case '\r' :
2975+ appendStrBufCarriageReturn ();
2976+ break stateloop ;
2977+ case '\n' :
2978+ appendStrBufLineFeed ();
2979+ continue ;
2980+ case '\u0000' :
2981+ c = '\uFFFD' ;
2982+ // fall thru
2983+ default :
2984+ appendStrBuf (c );
2985+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2986+ continue stateloop ;
2987+ }
2988+ }
2989+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2990+ for (;;) {
2991+ if (++pos == endPos ) {
2992+ break stateloop ;
2993+ }
2994+ c = checkChar (buf , pos );
2995+ switch (c ) {
2996+ case '>' :
2997+ appendStrBuf (c );
2998+ emitComment (3 , pos );
2999+ state = transition (state , Tokenizer .DATA , reconsume , pos );
3000+ continue stateloop ;
3001+ case '-' :
3002+ errNestedComment ();
3003+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3004+ reportedConsecutiveHyphens = true ;
3005+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
3006+ continue stateloop ;
3007+ case '\r' :
3008+ errNestedComment ();
3009+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3010+ reportedConsecutiveHyphens = true ;
3011+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
3012+ break stateloop ;
3013+ case '\n' :
3014+ errNestedComment ();
3015+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3016+ reportedConsecutiveHyphens = true ;
3017+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
3018+ continue ;
3019+ case '\u0000' :
3020+ c = '\uFFFD' ;
3021+ // fall thru
3022+ case '!' :
3023+ errNestedComment ();
3024+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3025+ reportedConsecutiveHyphens = true ;
3026+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
3027+ continue stateloop ;
3028+ default :
3029+ errNestedComment ();
3030+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3031+ reportedConsecutiveHyphens = true ;
3032+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
3033+ continue stateloop ;
3034+ }
3035+ }
3036+ // XXX reorder point
28673037 case COMMENT_START_DASH :
28683038 if (++pos == endPos ) {
28693039 break stateloop ;
@@ -2892,6 +3062,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28923062 */
28933063 state = transition (state , Tokenizer .DATA , reconsume , pos );
28943064 continue stateloop ;
3065+ case '<' :
3066+ appendStrBuf (c );
3067+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3068+ continue stateloop ;
28953069 case '\r' :
28963070 appendStrBufCarriageReturn ();
28973071 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -6019,13 +6193,13 @@ private void initDoctypeFields() {
60196193 @ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
60206194 throws SAXException {
60216195 silentCarriageReturn ();
6022- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6196+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
60236197 }
60246198
60256199 @ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
60266200 throws SAXException {
60276201 silentLineFeed ();
6028- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6202+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
60296203 }
60306204
60316205 @ Inline private void appendStrBufLineFeed () {
@@ -6330,6 +6504,8 @@ public void eof() throws SAXException {
63306504 break eofloop ;
63316505 case COMMENT_START :
63326506 case COMMENT :
6507+ case COMMENT_LESSTHAN :
6508+ case COMMENT_LESSTHAN_BANG :
63336509 /*
63346510 * EOF Parse error.
63356511 */
@@ -6341,6 +6517,7 @@ public void eof() throws SAXException {
63416517 */
63426518 break eofloop ;
63436519 case COMMENT_END :
6520+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
63446521 errEofInComment ();
63456522 /* Emit the comment token. */
63466523 emitComment (2 , 0 );
@@ -6350,6 +6527,7 @@ public void eof() throws SAXException {
63506527 break eofloop ;
63516528 case COMMENT_END_DASH :
63526529 case COMMENT_START_DASH :
6530+ case COMMENT_LESSTHAN_BANG_DASH :
63536531 errEofInComment ();
63546532 /* Emit the comment token. */
63556533 emitComment (1 , 0 );
@@ -6974,7 +7152,7 @@ protected void errGtInPublicId() throws SAXException {
69747152 protected void errNamelessDoctype () throws SAXException {
69757153 }
69767154
6977- protected void errConsecutiveHyphens () throws SAXException {
7155+ protected void errNestedComment () throws SAXException {
69787156 }
69797157
69807158 protected void errPrematureEndOfComment () throws SAXException {
@@ -7124,9 +7302,6 @@ protected void errExpectedSystemId() throws SAXException {
71247302 protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
71257303 }
71267304
7127- protected void errHyphenHyphenBang () throws SAXException {
7128- }
7129-
71307305 protected void errNcrControlChar () throws SAXException {
71317306 }
71327307
0 commit comments