@@ -220,6 +220,8 @@ public class Tokenizer implements Locator {
220220
221221 public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74 ;
222222
223+ public static final int AMBIGUOUS_AMPERSAND = 75 ;
224+
223225 /**
224226 * Magic value for UTF-16 operations.
225227 */
@@ -3106,6 +3108,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
31063108 case '<' :
31073109 case '&' :
31083110 case '\u0000' :
3111+ case ';' :
31093112 emitOrAppendCharRefBuf (returnState );
31103113 if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
31113114 cstart = pos ;
@@ -3134,17 +3137,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
31343137 firstCharKey = c - 'A' ;
31353138 } else {
31363139 // No match
3137- /*
3138- * If no match can be made, then this is a parse
3139- * error.
3140- */
3141- errNoNamedCharacterMatch ();
31423140 emitOrAppendCharRefBuf (returnState );
31433141 if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
31443142 cstart = pos ;
31453143 }
31463144 reconsume = true ;
3147- state = transition (state , returnState , reconsume , pos );
3145+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
31483146 continue stateloop ;
31493147 }
31503148 // Didn't fail yet
@@ -3205,17 +3203,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
32053203 }
32063204 }
32073205 if (hilo == 0 ) {
3208- /*
3209- * If no match can be made, then this is a parse
3210- * error.
3211- */
3212- errNoNamedCharacterMatch ();
32133206 emitOrAppendCharRefBuf (returnState );
32143207 if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
32153208 cstart = pos ;
32163209 }
32173210 reconsume = true ;
3218- state = transition (state , returnState , reconsume , pos );
3211+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
32193212 continue stateloop ;
32203213 }
32213214 // Didn't fail yet
@@ -3298,16 +3291,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
32983291
32993292 if (candidate == -1 ) {
33003293 // reconsume deals with CR, LF or nul
3301- /*
3302- * If no match can be made, then this is a parse error.
3303- */
3304- errNoNamedCharacterMatch ();
33053294 emitOrAppendCharRefBuf (returnState );
33063295 if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
33073296 cstart = pos ;
33083297 }
33093298 reconsume = true ;
3310- state = transition (state , returnState , reconsume , pos );
3299+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
33113300 continue stateloop ;
33123301 } else {
33133302 // c can't be CR, LF or nul if we got here
@@ -3345,10 +3334,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
33453334 * after the U+0026 AMPERSAND (&) must be
33463335 * unconsumed, and nothing is returned.
33473336 */
3348- errNoNamedCharacterMatch ();
33493337 appendCharRefBufToStrBuf ();
33503338 reconsume = true ;
3351- state = transition (state , returnState , reconsume , pos );
3339+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
33523340 continue stateloop ;
33533341 }
33543342 }
@@ -3411,6 +3399,28 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
34113399 * I'm ∉ I tell you.
34123400 */
34133401 }
3402+ // XXX reorder point
3403+ case AMBIGUOUS_AMPERSAND :
3404+ ampersandloop : for (;;) {
3405+ if (reconsume ) {
3406+ if (++pos == endPos ) {
3407+ break stateloop ;
3408+ }
3409+ pos --;
3410+ c = checkChar (buf , pos );
3411+ }
3412+ if (c == ';' ) {
3413+ errNoNamedCharacterMatch ();
3414+ } else if ((c >= '0' && c <= '9' )
3415+ || (c >= 'A' && c <= 'Z' )
3416+ || (c >= 'a' && c <= 'z' )) {
3417+ appendStrBuf (c );
3418+ pos ++;
3419+ continue ;
3420+ }
3421+ state = transition (state , returnState , reconsume , pos );
3422+ continue stateloop ;
3423+ }
34143424 case CONSUME_NCR :
34153425 if (++pos == endPos ) {
34163426 break stateloop ;
@@ -6501,7 +6511,6 @@ public void eof() throws SAXException {
65016511 state = returnState ;
65026512 continue ;
65036513 case CHARACTER_REFERENCE_HILO_LOOKUP :
6504- errNoNamedCharacterMatch ();
65056514 emitOrAppendCharRefBuf (returnState );
65066515 state = returnState ;
65076516 continue ;
@@ -6555,10 +6564,6 @@ public void eof() throws SAXException {
65556564 }
65566565
65576566 if (candidate == -1 ) {
6558- /*
6559- * If no match can be made, then this is a parse error.
6560- */
6561- errNoNamedCharacterMatch ();
65626567 emitOrAppendCharRefBuf (returnState );
65636568 state = returnState ;
65646569 continue eofloop ;
@@ -6596,7 +6601,6 @@ public void eof() throws SAXException {
65966601 * after the U+0026 AMPERSAND (&) must be
65976602 * unconsumed, and nothing is returned.
65986603 */
6599- errNoNamedCharacterMatch ();
66006604 appendCharRefBufToStrBuf ();
66016605 state = returnState ;
66026606 continue eofloop ;
0 commit comments