@@ -221,6 +221,16 @@ public class Tokenizer implements Locator, Locator2 {
221221
222222 public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74 ;
223223
224+ public static final int AMBIGUOUS_AMPERSAND = 75 ;
225+
226+ public static final int COMMENT_LESSTHAN = 76 ;
227+
228+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
229+
230+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
231+
232+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
233+
224234 /**
225235 * Magic value for UTF-16 operations.
226236 */
@@ -1029,9 +1039,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10291039
10301040 // ]NOCPP]
10311041
1032- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1042+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
10331043 throws SAXException {
1034- errConsecutiveHyphens ();
10351044 // [NOCPP[
10361045 switch (commentPolicy ) {
10371046 case ALTER_INFOSET :
@@ -1042,7 +1051,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10421051 appendStrBuf ('-' );
10431052 // CPPONLY: MOZ_FALLTHROUGH;
10441053 case ALLOW :
1045- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1054+ if (!reportedConsecutiveHyphens ) {
1055+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1056+ }
10461057 // ]NOCPP]
10471058 appendStrBuf (c );
10481059 // [NOCPP[
@@ -1464,6 +1475,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
14641475 @ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
14651476 int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
14661477 int endPos ) throws SAXException {
1478+ boolean reportedConsecutiveHyphens = false ;
14671479 /*
14681480 * Idioms used in this code:
14691481 *
@@ -2540,6 +2552,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25402552 }
25412553 // CPPONLY: MOZ_FALLTHROUGH;
25422554 case COMMENT_START :
2555+ reportedConsecutiveHyphens = false ;
25432556 commentstartloop : for (;;) {
25442557 if (++pos == endPos ) {
25452558 break stateloop ;
@@ -2572,6 +2585,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25722585 */
25732586 state = transition (state , Tokenizer .DATA , reconsume , pos );
25742587 continue stateloop ;
2588+ case '<' :
2589+ appendStrBuf (c );
2590+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2591+ continue stateloop ;
25752592 case '\r' :
25762593 appendStrBufCarriageReturn ();
25772594 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2617,6 +2634,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26172634 state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
26182635 break commentloop ;
26192636 // continue stateloop;
2637+ case '<' :
2638+ appendStrBuf (c );
2639+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2640+ continue stateloop ;
26202641 case '\r' :
26212642 appendStrBufCarriageReturn ();
26222643 break stateloop ;
@@ -2659,6 +2680,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26592680 state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
26602681 break commentenddashloop ;
26612682 // continue stateloop;
2683+ case '<' :
2684+ appendStrBuf (c );
2685+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2686+ continue stateloop ;
26622687 case '\r' :
26632688 appendStrBufCarriageReturn ();
26642689 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2713,11 +2738,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27132738 * Append a U+002D HYPHEN-MINUS (-) character to
27142739 * the comment token's data.
27152740 */
2716- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2741+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2742+ reportedConsecutiveHyphens = true ;
27172743 /*
27182744 * Stay in the comment end state.
27192745 */
27202746 continue ;
2747+ case '<' :
2748+ appendStrBuf (c );
2749+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2750+ continue stateloop ;
27212751 case '\r' :
27222752 adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
27232753 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2727,7 +2757,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27272757 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
27282758 continue stateloop ;
27292759 case '!' :
2730- errHyphenHyphenBang ();
27312760 appendStrBuf (c );
27322761 state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
27332762 continue stateloop ;
@@ -2740,7 +2769,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27402769 * and the input character to the comment
27412770 * token's data.
27422771 */
2743- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2772+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2773+ reportedConsecutiveHyphens = true ;
27442774 /*
27452775 * Switch to the comment state.
27462776 */
@@ -2810,6 +2840,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28102840 continue stateloop ;
28112841 }
28122842 }
2843+ case COMMENT_LESSTHAN :
2844+ for (;;) {
2845+ if (++pos == endPos ) {
2846+ break stateloop ;
2847+ }
2848+ c = checkChar (buf , pos );
2849+ switch (c ) {
2850+ case '!' :
2851+ appendStrBuf (c );
2852+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2853+ continue stateloop ;
2854+ case '<' :
2855+ appendStrBuf (c );
2856+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2857+ continue stateloop ;
2858+ case '-' :
2859+ appendStrBuf (c );
2860+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2861+ continue stateloop ;
2862+ case '\r' :
2863+ appendStrBufCarriageReturn ();
2864+ break stateloop ;
2865+ case '\n' :
2866+ appendStrBufLineFeed ();
2867+ continue ;
2868+ case '\u0000' :
2869+ c = '\uFFFD' ;
2870+ // fall thru
2871+ default :
2872+ appendStrBuf (c );
2873+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2874+ continue stateloop ;
2875+ }
2876+ }
2877+ case COMMENT_LESSTHAN_BANG :
2878+ for (;;) {
2879+ if (++pos == endPos ) {
2880+ break stateloop ;
2881+ }
2882+ c = checkChar (buf , pos );
2883+ switch (c ) {
2884+ case '-' :
2885+ appendStrBuf (c );
2886+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2887+ continue stateloop ;
2888+ case '<' :
2889+ appendStrBuf (c );
2890+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2891+ continue stateloop ;
2892+ case '\r' :
2893+ appendStrBufCarriageReturn ();
2894+ break stateloop ;
2895+ case '\n' :
2896+ appendStrBufLineFeed ();
2897+ continue ;
2898+ case '\u0000' :
2899+ c = '\uFFFD' ;
2900+ // fall thru
2901+ default :
2902+ appendStrBuf (c );
2903+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2904+ continue stateloop ;
2905+ }
2906+ }
2907+ case COMMENT_LESSTHAN_BANG_DASH :
2908+ for (;;) {
2909+ if (++pos == endPos ) {
2910+ break stateloop ;
2911+ }
2912+ c = checkChar (buf , pos );
2913+ switch (c ) {
2914+ case '-' :
2915+ appendStrBuf (c );
2916+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2917+ continue stateloop ;
2918+ case '<' :
2919+ appendStrBuf (c );
2920+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2921+ continue stateloop ;
2922+ case '\r' :
2923+ appendStrBufCarriageReturn ();
2924+ break stateloop ;
2925+ case '\n' :
2926+ appendStrBufLineFeed ();
2927+ continue ;
2928+ case '\u0000' :
2929+ c = '\uFFFD' ;
2930+ // fall thru
2931+ default :
2932+ appendStrBuf (c );
2933+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2934+ continue stateloop ;
2935+ }
2936+ }
2937+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2938+ for (;;) {
2939+ if (++pos == endPos ) {
2940+ break stateloop ;
2941+ }
2942+ c = checkChar (buf , pos );
2943+ switch (c ) {
2944+ case '>' :
2945+ appendStrBuf (c );
2946+ emitComment (3 , pos );
2947+ state = transition (state , Tokenizer .DATA , reconsume , pos );
2948+ continue stateloop ;
2949+ case '-' :
2950+ errNestedComment ();
2951+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2952+ reportedConsecutiveHyphens = true ;
2953+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2954+ continue stateloop ;
2955+ case '\r' :
2956+ errNestedComment ();
2957+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2958+ reportedConsecutiveHyphens = true ;
2959+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2960+ break stateloop ;
2961+ case '\n' :
2962+ errNestedComment ();
2963+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2964+ reportedConsecutiveHyphens = true ;
2965+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2966+ continue ;
2967+ case '\u0000' :
2968+ c = '\uFFFD' ;
2969+ // fall thru
2970+ case '!' :
2971+ errNestedComment ();
2972+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2973+ reportedConsecutiveHyphens = true ;
2974+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2975+ continue stateloop ;
2976+ default :
2977+ errNestedComment ();
2978+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2979+ reportedConsecutiveHyphens = true ;
2980+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2981+ continue stateloop ;
2982+ }
2983+ }
2984+ // XXX reorder point
28132985 case COMMENT_START_DASH :
28142986 if (++pos == endPos ) {
28152987 break stateloop ;
@@ -2838,6 +3010,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28383010 */
28393011 state = transition (state , Tokenizer .DATA , reconsume , pos );
28403012 continue stateloop ;
3013+ case '<' :
3014+ appendStrBuf (c );
3015+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3016+ continue stateloop ;
28413017 case '\r' :
28423018 appendStrBufCarriageReturn ();
28433019 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -5957,13 +6133,13 @@ private void initDoctypeFields() {
59576133 @ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
59586134 throws SAXException {
59596135 silentCarriageReturn ();
5960- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6136+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
59616137 }
59626138
59636139 @ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
59646140 throws SAXException {
59656141 silentLineFeed ();
5966- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6142+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
59676143 }
59686144
59696145 @ Inline private void appendStrBufLineFeed () {
@@ -6268,6 +6444,8 @@ public void eof() throws SAXException {
62686444 break eofloop ;
62696445 case COMMENT_START :
62706446 case COMMENT :
6447+ case COMMENT_LESSTHAN :
6448+ case COMMENT_LESSTHAN_BANG :
62716449 /*
62726450 * EOF Parse error.
62736451 */
@@ -6279,6 +6457,7 @@ public void eof() throws SAXException {
62796457 */
62806458 break eofloop ;
62816459 case COMMENT_END :
6460+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
62826461 errEofInComment ();
62836462 /* Emit the comment token. */
62846463 emitComment (2 , 0 );
@@ -6288,6 +6467,7 @@ public void eof() throws SAXException {
62886467 break eofloop ;
62896468 case COMMENT_END_DASH :
62906469 case COMMENT_START_DASH :
6470+ case COMMENT_LESSTHAN_BANG_DASH :
62916471 errEofInComment ();
62926472 /* Emit the comment token. */
62936473 emitComment (1 , 0 );
@@ -6917,7 +7097,7 @@ protected void errGtInPublicId() throws SAXException {
69177097 protected void errNamelessDoctype () throws SAXException {
69187098 }
69197099
6920- protected void errConsecutiveHyphens () throws SAXException {
7100+ protected void errNestedComment () throws SAXException {
69217101 }
69227102
69237103 protected void errPrematureEndOfComment () throws SAXException {
@@ -7060,9 +7240,6 @@ protected void errExpectedSystemId() throws SAXException {
70607240 protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
70617241 }
70627242
7063- protected void errHyphenHyphenBang () throws SAXException {
7064- }
7065-
70667243 protected void errNcrControlChar () throws SAXException {
70677244 }
70687245
0 commit comments