@@ -221,6 +221,8 @@ public class Tokenizer implements Locator, Locator2 {
221221
222222 public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74 ;
223223
224+ public static final int AMBIGUOUS_AMPERSAND = 75 ;
225+
224226 /**
225227 * Magic value for UTF-16 operations.
226228 */
@@ -3054,6 +3056,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
30543056 case '<' :
30553057 case '&' :
30563058 case '\u0000' :
3059+ case ';' :
30573060 emitOrAppendCharRefBuf (returnState );
30583061 if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
30593062 cstart = pos ;
@@ -3082,17 +3085,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
30823085 firstCharKey = c - 'A' ;
30833086 } else {
30843087 // No match
3085- /*
3086- * If no match can be made, then this is a parse
3087- * error.
3088- */
3089- errNoNamedCharacterMatch ();
30903088 emitOrAppendCharRefBuf (returnState );
30913089 if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
30923090 cstart = pos ;
30933091 }
30943092 reconsume = true ;
3095- state = transition (state , returnState , reconsume , pos );
3093+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
30963094 continue stateloop ;
30973095 }
30983096 // Didn't fail yet
@@ -3153,17 +3151,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
31533151 }
31543152 }
31553153 if (hilo == 0 ) {
3156- /*
3157- * If no match can be made, then this is a parse
3158- * error.
3159- */
3160- errNoNamedCharacterMatch ();
31613154 emitOrAppendCharRefBuf (returnState );
31623155 if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
31633156 cstart = pos ;
31643157 }
31653158 reconsume = true ;
3166- state = transition (state , returnState , reconsume , pos );
3159+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
31673160 continue stateloop ;
31683161 }
31693162 // Didn't fail yet
@@ -3246,16 +3239,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
32463239
32473240 if (candidate == -1 ) {
32483241 // reconsume deals with CR, LF or nul
3249- /*
3250- * If no match can be made, then this is a parse error.
3251- */
3252- errNoNamedCharacterMatch ();
32533242 emitOrAppendCharRefBuf (returnState );
32543243 if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
32553244 cstart = pos ;
32563245 }
32573246 reconsume = true ;
3258- state = transition (state , returnState , reconsume , pos );
3247+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
32593248 continue stateloop ;
32603249 } else {
32613250 // c can't be CR, LF or nul if we got here
@@ -3293,10 +3282,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
32933282 * after the U+0026 AMPERSAND (&) must be
32943283 * unconsumed, and nothing is returned.
32953284 */
3296- errNoNamedCharacterMatch ();
32973285 appendCharRefBufToStrBuf ();
32983286 reconsume = true ;
3299- state = transition (state , returnState , reconsume , pos );
3287+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
33003288 continue stateloop ;
33013289 }
33023290 }
@@ -3359,6 +3347,28 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
33593347 * I'm ∉ I tell you.
33603348 */
33613349 }
3350+ // XXX reorder point
3351+ case AMBIGUOUS_AMPERSAND :
3352+ ampersandloop : for (;;) {
3353+ if (reconsume ) {
3354+ if (++pos == endPos ) {
3355+ break stateloop ;
3356+ }
3357+ pos --;
3358+ c = checkChar (buf , pos );
3359+ }
3360+ if (c == ';' ) {
3361+ errNoNamedCharacterMatch ();
3362+ } else if ((c >= '0' && c <= '9' )
3363+ || (c >= 'A' && c <= 'Z' )
3364+ || (c >= 'a' && c <= 'z' )) {
3365+ appendStrBuf (c );
3366+ pos ++;
3367+ continue ;
3368+ }
3369+ state = transition (state , returnState , reconsume , pos );
3370+ continue stateloop ;
3371+ }
33623372 case CONSUME_NCR :
33633373 if (++pos == endPos ) {
33643374 break stateloop ;
@@ -6449,7 +6459,6 @@ public void eof() throws SAXException {
64496459 state = returnState ;
64506460 continue ;
64516461 case CHARACTER_REFERENCE_HILO_LOOKUP :
6452- errNoNamedCharacterMatch ();
64536462 emitOrAppendCharRefBuf (returnState );
64546463 state = returnState ;
64556464 continue ;
@@ -6503,10 +6512,6 @@ public void eof() throws SAXException {
65036512 }
65046513
65056514 if (candidate == -1 ) {
6506- /*
6507- * If no match can be made, then this is a parse error.
6508- */
6509- errNoNamedCharacterMatch ();
65106515 emitOrAppendCharRefBuf (returnState );
65116516 state = returnState ;
65126517 continue eofloop ;
@@ -6544,7 +6549,6 @@ public void eof() throws SAXException {
65446549 * after the U+0026 AMPERSAND (&) must be
65456550 * unconsumed, and nothing is returned.
65466551 */
6547- errNoNamedCharacterMatch ();
65486552 appendCharRefBufToStrBuf ();
65496553 state = returnState ;
65506554 continue eofloop ;
0 commit comments