@@ -223,6 +223,14 @@ public class Tokenizer implements Locator, Locator2 {
223223
224224 public static final int AMBIGUOUS_AMPERSAND = 75 ;
225225
226+ public static final int COMMENT_LESSTHAN = 76 ;
227+
228+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
229+
230+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
231+
232+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
233+
226234 /**
227235 * Magic value for UTF-16 operations.
228236 */
@@ -1031,9 +1039,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10311039
10321040 // ]NOCPP]
10331041
1034- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1042+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
10351043 throws SAXException {
1036- errConsecutiveHyphens ();
10371044 // [NOCPP[
10381045 switch (commentPolicy ) {
10391046 case ALTER_INFOSET :
@@ -1044,7 +1051,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10441051 appendStrBuf ('-' );
10451052 // CPPONLY: MOZ_FALLTHROUGH;
10461053 case ALLOW :
1047- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1054+ if (!reportedConsecutiveHyphens ) {
1055+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1056+ }
10481057 // ]NOCPP]
10491058 appendStrBuf (c );
10501059 // [NOCPP[
@@ -1466,6 +1475,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
14661475 @ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
14671476 int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
14681477 int endPos ) throws SAXException {
1478+ boolean reportedConsecutiveHyphens = false ;
14691479 /*
14701480 * Idioms used in this code:
14711481 *
@@ -2542,6 +2552,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25422552 }
25432553 // CPPONLY: MOZ_FALLTHROUGH;
25442554 case COMMENT_START :
2555+ reportedConsecutiveHyphens = false ;
25452556 commentstartloop : for (;;) {
25462557 if (++pos == endPos ) {
25472558 break stateloop ;
@@ -2574,6 +2585,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25742585 */
25752586 state = transition (state , Tokenizer .DATA , reconsume , pos );
25762587 continue stateloop ;
2588+ case '<' :
2589+ appendStrBuf (c );
2590+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2591+ continue stateloop ;
25772592 case '\r' :
25782593 appendStrBufCarriageReturn ();
25792594 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2619,6 +2634,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26192634 state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
26202635 break commentloop ;
26212636 // continue stateloop;
2637+ case '<' :
2638+ appendStrBuf (c );
2639+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2640+ continue stateloop ;
26222641 case '\r' :
26232642 appendStrBufCarriageReturn ();
26242643 break stateloop ;
@@ -2661,6 +2680,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26612680 state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
26622681 break commentenddashloop ;
26632682 // continue stateloop;
2683+ case '<' :
2684+ appendStrBuf (c );
2685+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2686+ continue stateloop ;
26642687 case '\r' :
26652688 appendStrBufCarriageReturn ();
26662689 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2715,11 +2738,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27152738 * Append a U+002D HYPHEN-MINUS (-) character to
27162739 * the comment token's data.
27172740 */
2718- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2741+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2742+ reportedConsecutiveHyphens = true ;
27192743 /*
27202744 * Stay in the comment end state.
27212745 */
27222746 continue ;
2747+ case '<' :
2748+ appendStrBuf (c );
2749+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2750+ continue stateloop ;
27232751 case '\r' :
27242752 adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
27252753 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2729,7 +2757,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27292757 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
27302758 continue stateloop ;
27312759 case '!' :
2732- errHyphenHyphenBang ();
27332760 appendStrBuf (c );
27342761 state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
27352762 continue stateloop ;
@@ -2742,7 +2769,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27422769 * and the input character to the comment
27432770 * token's data.
27442771 */
2745- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2772+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2773+ reportedConsecutiveHyphens = true ;
27462774 /*
27472775 * Switch to the comment state.
27482776 */
@@ -2812,6 +2840,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28122840 continue stateloop ;
28132841 }
28142842 }
2843+ case COMMENT_LESSTHAN :
2844+ for (;;) {
2845+ if (++pos == endPos ) {
2846+ break stateloop ;
2847+ }
2848+ c = checkChar (buf , pos );
2849+ switch (c ) {
2850+ case '!' :
2851+ appendStrBuf (c );
2852+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2853+ continue stateloop ;
2854+ case '<' :
2855+ appendStrBuf (c );
2856+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2857+ continue stateloop ;
2858+ case '-' :
2859+ appendStrBuf (c );
2860+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2861+ continue stateloop ;
2862+ case '\r' :
2863+ appendStrBufCarriageReturn ();
2864+ break stateloop ;
2865+ case '\n' :
2866+ appendStrBufLineFeed ();
2867+ continue ;
2868+ case '\u0000' :
2869+ c = '\uFFFD' ;
2870+ // fall thru
2871+ default :
2872+ appendStrBuf (c );
2873+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2874+ continue stateloop ;
2875+ }
2876+ }
2877+ case COMMENT_LESSTHAN_BANG :
2878+ for (;;) {
2879+ if (++pos == endPos ) {
2880+ break stateloop ;
2881+ }
2882+ c = checkChar (buf , pos );
2883+ switch (c ) {
2884+ case '-' :
2885+ appendStrBuf (c );
2886+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2887+ continue stateloop ;
2888+ case '<' :
2889+ appendStrBuf (c );
2890+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2891+ continue stateloop ;
2892+ case '\r' :
2893+ appendStrBufCarriageReturn ();
2894+ break stateloop ;
2895+ case '\n' :
2896+ appendStrBufLineFeed ();
2897+ continue ;
2898+ case '\u0000' :
2899+ c = '\uFFFD' ;
2900+ // fall thru
2901+ default :
2902+ appendStrBuf (c );
2903+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2904+ continue stateloop ;
2905+ }
2906+ }
2907+ case COMMENT_LESSTHAN_BANG_DASH :
2908+ for (;;) {
2909+ if (++pos == endPos ) {
2910+ break stateloop ;
2911+ }
2912+ c = checkChar (buf , pos );
2913+ switch (c ) {
2914+ case '-' :
2915+ appendStrBuf (c );
2916+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2917+ continue stateloop ;
2918+ case '<' :
2919+ appendStrBuf (c );
2920+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2921+ continue stateloop ;
2922+ case '\r' :
2923+ appendStrBufCarriageReturn ();
2924+ break stateloop ;
2925+ case '\n' :
2926+ appendStrBufLineFeed ();
2927+ continue ;
2928+ case '\u0000' :
2929+ c = '\uFFFD' ;
2930+ // fall thru
2931+ default :
2932+ appendStrBuf (c );
2933+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2934+ continue stateloop ;
2935+ }
2936+ }
2937+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2938+ for (;;) {
2939+ if (++pos == endPos ) {
2940+ break stateloop ;
2941+ }
2942+ c = checkChar (buf , pos );
2943+ switch (c ) {
2944+ case '>' :
2945+ appendStrBuf (c );
2946+ emitComment (3 , pos );
2947+ state = transition (state , Tokenizer .DATA , reconsume , pos );
2948+ continue stateloop ;
2949+ case '-' :
2950+ errNestedComment ();
2951+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2952+ reportedConsecutiveHyphens = true ;
2953+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2954+ continue stateloop ;
2955+ case '\r' :
2956+ errNestedComment ();
2957+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2958+ reportedConsecutiveHyphens = true ;
2959+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2960+ break stateloop ;
2961+ case '\n' :
2962+ errNestedComment ();
2963+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2964+ reportedConsecutiveHyphens = true ;
2965+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2966+ continue ;
2967+ case '\u0000' :
2968+ c = '\uFFFD' ;
2969+ // fall thru
2970+ case '!' :
2971+ errNestedComment ();
2972+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2973+ reportedConsecutiveHyphens = true ;
2974+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2975+ continue stateloop ;
2976+ default :
2977+ errNestedComment ();
2978+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2979+ reportedConsecutiveHyphens = true ;
2980+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2981+ continue stateloop ;
2982+ }
2983+ }
2984+ // XXX reorder point
28152985 case COMMENT_START_DASH :
28162986 if (++pos == endPos ) {
28172987 break stateloop ;
@@ -2840,6 +3010,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28403010 */
28413011 state = transition (state , Tokenizer .DATA , reconsume , pos );
28423012 continue stateloop ;
3013+ case '<' :
3014+ appendStrBuf (c );
3015+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3016+ continue stateloop ;
28433017 case '\r' :
28443018 appendStrBufCarriageReturn ();
28453019 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -5967,13 +6141,13 @@ private void initDoctypeFields() {
59676141 @ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
59686142 throws SAXException {
59696143 silentCarriageReturn ();
5970- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6144+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
59716145 }
59726146
59736147 @ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
59746148 throws SAXException {
59756149 silentLineFeed ();
5976- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6150+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
59776151 }
59786152
59796153 @ Inline private void appendStrBufLineFeed () {
@@ -6278,6 +6452,8 @@ public void eof() throws SAXException {
62786452 break eofloop ;
62796453 case COMMENT_START :
62806454 case COMMENT :
6455+ case COMMENT_LESSTHAN :
6456+ case COMMENT_LESSTHAN_BANG :
62816457 /*
62826458 * EOF Parse error.
62836459 */
@@ -6289,6 +6465,7 @@ public void eof() throws SAXException {
62896465 */
62906466 break eofloop ;
62916467 case COMMENT_END :
6468+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
62926469 errEofInComment ();
62936470 /* Emit the comment token. */
62946471 emitComment (2 , 0 );
@@ -6298,6 +6475,7 @@ public void eof() throws SAXException {
62986475 break eofloop ;
62996476 case COMMENT_END_DASH :
63006477 case COMMENT_START_DASH :
6478+ case COMMENT_LESSTHAN_BANG_DASH :
63016479 errEofInComment ();
63026480 /* Emit the comment token. */
63036481 emitComment (1 , 0 );
@@ -6921,7 +7099,7 @@ protected void errGtInPublicId() throws SAXException {
69217099 protected void errNamelessDoctype () throws SAXException {
69227100 }
69237101
6924- protected void errConsecutiveHyphens () throws SAXException {
7102+ protected void errNestedComment () throws SAXException {
69257103 }
69267104
69277105 protected void errPrematureEndOfComment () throws SAXException {
@@ -7064,9 +7242,6 @@ protected void errExpectedSystemId() throws SAXException {
70647242 protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
70657243 }
70667244
7067- protected void errHyphenHyphenBang () throws SAXException {
7068- }
7069-
70707245 protected void errNcrControlChar () throws SAXException {
70717246 }
70727247
0 commit comments