@@ -579,21 +579,6 @@ static GumboInsertionMode get_appropriate_insertion_mode(
579579 GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL ;
580580
581581 switch (node -> v .element .tag ) {
582- case GUMBO_TAG_SELECT : {
583- if (is_last ) {
584- return GUMBO_INSERTION_MODE_IN_SELECT ;
585- }
586- for (int i = index ; i > 0 ; -- i ) {
587- const GumboNode * ancestor = open_elements -> data [i ];
588- if (node_html_tag_is (ancestor , GUMBO_TAG_TEMPLATE )) {
589- return GUMBO_INSERTION_MODE_IN_SELECT ;
590- }
591- if (node_html_tag_is (ancestor , GUMBO_TAG_TABLE )) {
592- return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE ;
593- }
594- }
595- return GUMBO_INSERTION_MODE_IN_SELECT ;
596- }
597582 case GUMBO_TAG_TD :
598583 case GUMBO_TAG_TH :
599584 if (!is_last ) return GUMBO_INSERTION_MODE_IN_CELL ;
@@ -1467,12 +1452,6 @@ static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
14671452 (gumbo_tagset ){TAG (HTML ), TAG (TABLE ), TAG (TEMPLATE )});
14681453}
14691454
1470- // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
1471- static bool has_an_element_in_select_scope (GumboParser * parser , GumboTag tag ) {
1472- return has_an_element_in_specific_scope (
1473- parser , 1 , & tag , true, (gumbo_tagset ){TAG (OPTGROUP ), TAG (OPTION )});
1474- }
1475-
14761455// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
14771456// "exception" is the "element to exclude from the process" listed in the spec.
14781457// Pass GUMBO_TAG_LAST to not exclude any of them.
@@ -1546,18 +1525,6 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
15461525 }
15471526}
15481527
1549- // This factors out the "act as if an end tag of tag name 'select' had been
1550- // seen" clause of the spec, since it's referenced in several places. It pops
1551- // all nodes from the stack until the current <select> has been closed, then
1552- // resets the insertion mode appropriately.
1553- static void close_current_select (GumboParser * parser ) {
1554- GumboNode * node = pop_current_node (parser );
1555- while (!node_html_tag_is (node , GUMBO_TAG_SELECT )) {
1556- node = pop_current_node (parser );
1557- }
1558- reset_insertion_mode_appropriately (parser );
1559- }
1560-
15611528// The list of nodes in the "special" category:
15621529// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
15631530static bool is_special_node (const GumboNode * node ) {
@@ -2798,6 +2765,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
27982765 set_frameset_not_ok (parser );
27992766 return success ;
28002767 } else if (tag_is (token , kStartTag , GUMBO_TAG_INPUT )) {
2768+ if (has_an_element_in_scope (parser , GUMBO_TAG_SELECT )) {
2769+ parser_add_parse_error (parser , token );
2770+ while (!node_html_tag_is (pop_current_node (parser ), GUMBO_TAG_SELECT ));
2771+ parser -> _parser_state -> _reprocess_current_token = true;
2772+ return false;
2773+ }
28012774 if (!attribute_matches (& token -> v .start_tag .attributes , "type" , "hidden" )) {
28022775 // Must be before the element is inserted, as that takes ownership of the
28032776 // token's attribute vector.
@@ -2816,6 +2789,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
28162789 return true;
28172790 } else if (tag_is (token , kStartTag , GUMBO_TAG_HR )) {
28182791 bool result = maybe_implicitly_close_p_tag (parser , token );
2792+ if (has_an_element_in_scope (parser , GUMBO_TAG_SELECT )) {
2793+ generate_implied_end_tags (parser , GUMBO_TAG_LAST );
2794+ if (has_an_element_in_scope_with_tagname (parser , 2 , (GumboTag []) {GUMBO_TAG_OPTION , GUMBO_TAG_OPTGROUP })) {
2795+ parser_add_parse_error (parser , token );
2796+ }
2797+ }
28192798 insert_element_from_token (parser , token );
28202799 pop_current_node (parser );
28212800 acknowledge_self_closing_tag (parser );
@@ -2848,24 +2827,42 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
28482827 run_generic_parsing_algorithm (parser , token , GUMBO_LEX_RAWTEXT );
28492828 return true;
28502829 } else if (tag_is (token , kStartTag , GUMBO_TAG_SELECT )) {
2851- reconstruct_active_formatting_elements (parser );
2852- insert_element_from_token (parser , token );
2853- set_frameset_not_ok (parser );
2854- GumboInsertionMode state = parser -> _parser_state -> _insertion_mode ;
2855- if (state == GUMBO_INSERTION_MODE_IN_TABLE ||
2856- state == GUMBO_INSERTION_MODE_IN_CAPTION ||
2857- state == GUMBO_INSERTION_MODE_IN_TABLE_BODY ||
2858- state == GUMBO_INSERTION_MODE_IN_ROW ||
2859- state == GUMBO_INSERTION_MODE_IN_CELL ) {
2860- set_insertion_mode (parser , GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE );
2830+ if (has_an_element_in_scope (parser , GUMBO_TAG_SELECT )) {
2831+ parser_add_parse_error (parser , token );
2832+ ignore_token (parser );
2833+ while (!node_html_tag_is (pop_current_node (parser ), GUMBO_TAG_SELECT ));
28612834 } else {
2862- set_insertion_mode (parser , GUMBO_INSERTION_MODE_IN_SELECT );
2835+ reconstruct_active_formatting_elements (parser );
2836+ insert_element_from_token (parser , token );
2837+ set_frameset_not_ok (parser );
28632838 }
28642839 return true;
2865- } else if (tag_in (token , kStartTag ,
2866- (gumbo_tagset ){TAG (OPTION ), TAG (OPTGROUP )})) {
2867- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTION )) {
2868- pop_current_node (parser );
2840+ } else if (tag_is (token , kStartTag , GUMBO_TAG_OPTION )) {
2841+ if (has_an_element_in_scope (parser , GUMBO_TAG_SELECT )) {
2842+ generate_implied_end_tags (parser , GUMBO_TAG_OPTGROUP );
2843+ if (has_an_element_in_scope (parser , GUMBO_TAG_OPTION )) {
2844+ parser_add_parse_error (parser , token );
2845+ }
2846+ } else {
2847+ if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTION )) {
2848+ pop_current_node (parser );
2849+ }
2850+ }
2851+ reconstruct_active_formatting_elements (parser );
2852+ insert_element_from_token (parser , token );
2853+ return true;
2854+ } else if (tag_is (token , kStartTag , GUMBO_TAG_OPTGROUP )) {
2855+ if (has_an_element_in_scope (parser , GUMBO_TAG_SELECT )) {
2856+ generate_implied_end_tags (parser , GUMBO_TAG_LAST );
2857+ if (has_an_element_in_scope_with_tagname (parser , 2 , (GumboTag []) {
2858+ GUMBO_TAG_OPTION , GUMBO_TAG_OPTGROUP
2859+ })) {
2860+ parser_add_parse_error (parser , token );
2861+ }
2862+ } else {
2863+ if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTION )) {
2864+ pop_current_node (parser );
2865+ }
28692866 }
28702867 reconstruct_active_formatting_elements (parser );
28712868 insert_element_from_token (parser , token );
@@ -3372,142 +3369,6 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
33723369 }
33733370}
33743371
3375- // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselect
3376- static bool handle_in_select (GumboParser * parser , GumboToken * token ) {
3377- if (token -> type == GUMBO_TOKEN_NULL ) {
3378- parser_add_parse_error (parser , token );
3379- ignore_token (parser );
3380- return false;
3381- } else if (token -> type == GUMBO_TOKEN_CHARACTER ||
3382- token -> type == GUMBO_TOKEN_WHITESPACE ) {
3383- insert_text_token (parser , token );
3384- return true;
3385- } else if (token -> type == GUMBO_TOKEN_DOCTYPE ) {
3386- parser_add_parse_error (parser , token );
3387- ignore_token (parser );
3388- return false;
3389- } else if (token -> type == GUMBO_TOKEN_COMMENT ) {
3390- append_comment_node (parser , get_current_node (parser ), token );
3391- return true;
3392- } else if (tag_is (token , kStartTag , GUMBO_TAG_HTML )) {
3393- return handle_in_body (parser , token );
3394- } else if (tag_is (token , kStartTag , GUMBO_TAG_OPTION )) {
3395- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTION )) {
3396- pop_current_node (parser );
3397- }
3398- insert_element_from_token (parser , token );
3399- return true;
3400- } else if (tag_is (token , kStartTag , GUMBO_TAG_OPTGROUP )) {
3401- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTION )) {
3402- pop_current_node (parser );
3403- }
3404- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTGROUP )) {
3405- pop_current_node (parser );
3406- }
3407- insert_element_from_token (parser , token );
3408- return true;
3409- } else if (tag_is (token , kStartTag , GUMBO_TAG_HR )) {
3410- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTION )) {
3411- pop_current_node (parser );
3412- }
3413- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTGROUP )) {
3414- pop_current_node (parser );
3415- }
3416- insert_element_from_token (parser , token );
3417- pop_current_node (parser );
3418- acknowledge_self_closing_tag (parser );
3419- return true;
3420- } else if (tag_is (token , kEndTag , GUMBO_TAG_OPTGROUP )) {
3421- GumboVector * open_elements = & parser -> _parser_state -> _open_elements ;
3422- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTION ) &&
3423- node_html_tag_is (open_elements -> data [open_elements -> length - 2 ],
3424- GUMBO_TAG_OPTGROUP )) {
3425- pop_current_node (parser );
3426- }
3427- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTGROUP )) {
3428- pop_current_node (parser );
3429- return true;
3430- } else {
3431- parser_add_parse_error (parser , token );
3432- ignore_token (parser );
3433- return false;
3434- }
3435- } else if (tag_is (token , kEndTag , GUMBO_TAG_OPTION )) {
3436- if (node_html_tag_is (get_current_node (parser ), GUMBO_TAG_OPTION )) {
3437- pop_current_node (parser );
3438- return true;
3439- } else {
3440- parser_add_parse_error (parser , token );
3441- ignore_token (parser );
3442- return false;
3443- }
3444- } else if (tag_is (token , kEndTag , GUMBO_TAG_SELECT )) {
3445- if (!has_an_element_in_select_scope (parser , GUMBO_TAG_SELECT )) {
3446- parser_add_parse_error (parser , token );
3447- ignore_token (parser );
3448- return false;
3449- }
3450- close_current_select (parser );
3451- return true;
3452- } else if (tag_is (token , kStartTag , GUMBO_TAG_SELECT )) {
3453- parser_add_parse_error (parser , token );
3454- ignore_token (parser );
3455- if (has_an_element_in_select_scope (parser , GUMBO_TAG_SELECT )) {
3456- close_current_select (parser );
3457- }
3458- return false;
3459- } else if (tag_in (token , kStartTag ,
3460- (gumbo_tagset ){TAG (INPUT ), TAG (KEYGEN ), TAG (TEXTAREA )})) {
3461- parser_add_parse_error (parser , token );
3462- if (!has_an_element_in_select_scope (parser , GUMBO_TAG_SELECT )) {
3463- ignore_token (parser );
3464- } else {
3465- close_current_select (parser );
3466- parser -> _parser_state -> _reprocess_current_token = true;
3467- }
3468- return false;
3469- } else if (tag_in (token , kStartTag ,
3470- (gumbo_tagset ){TAG (SCRIPT ), TAG (TEMPLATE )}) ||
3471- tag_is (token , kEndTag , GUMBO_TAG_TEMPLATE )) {
3472- return handle_in_head (parser , token );
3473- } else if (token -> type == GUMBO_TOKEN_EOF ) {
3474- return handle_in_body (parser , token );
3475- } else {
3476- parser_add_parse_error (parser , token );
3477- ignore_token (parser );
3478- return false;
3479- }
3480- }
3481-
3482- // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
3483- static bool handle_in_select_in_table (GumboParser * parser , GumboToken * token ) {
3484- if (tag_in (token , kStartTag ,
3485- (gumbo_tagset ){TAG (CAPTION ), TAG (TABLE ), TAG (TBODY ), TAG (TFOOT ),
3486- TAG (THEAD ), TAG (TR ), TAG (TD ), TAG (TH )})) {
3487- parser_add_parse_error (parser , token );
3488- close_current_select (parser );
3489- parser -> _parser_state -> _reprocess_current_token = true;
3490- return false;
3491- } else if (tag_in (token , kEndTag ,
3492- (gumbo_tagset ){TAG (CAPTION ), TAG (TABLE ), TAG (TBODY ),
3493- TAG (TFOOT ), TAG (THEAD ), TAG (TR ), TAG (TD ), TAG (TH )})) {
3494- parser_add_parse_error (parser , token );
3495- if (!has_an_element_in_table_scope (parser , token -> v .end_tag )) {
3496- ignore_token (parser );
3497- return false;
3498- } else {
3499- close_current_select (parser );
3500- // close_current_select already does the
3501- // reset_insertion_mode_appropriately
3502- // reset_insertion_mode_appropriately(parser);
3503- parser -> _parser_state -> _reprocess_current_token = true;
3504- return false;
3505- }
3506- } else {
3507- return handle_in_select (parser , token );
3508- }
3509- }
3510-
35113372// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
35123373static bool handle_in_template (GumboParser * parser , GumboToken * token ) {
35133374 GumboParserState * state = parser -> _parser_state ;
@@ -3738,14 +3599,29 @@ static bool handle_after_after_frameset(
37383599// Function pointers for each insertion mode. Keep in sync with
37393600// insertion_mode.h.
37403601typedef bool (* TokenHandler )(GumboParser * parser , GumboToken * token );
3741- static const TokenHandler kTokenHandlers [] = {handle_initial ,
3742- handle_before_html , handle_before_head , handle_in_head ,
3743- handle_in_head_noscript , handle_after_head , handle_in_body , handle_text ,
3744- handle_in_table , handle_in_table_text , handle_in_caption ,
3745- handle_in_column_group , handle_in_table_body , handle_in_row , handle_in_cell ,
3746- handle_in_select , handle_in_select_in_table , handle_in_template ,
3747- handle_after_body , handle_in_frameset , handle_after_frameset ,
3748- handle_after_after_body , handle_after_after_frameset };
3602+ static const TokenHandler kTokenHandlers [] = {
3603+ handle_initial ,
3604+ handle_before_html ,
3605+ handle_before_head ,
3606+ handle_in_head ,
3607+ handle_in_head_noscript ,
3608+ handle_after_head ,
3609+ handle_in_body ,
3610+ handle_text ,
3611+ handle_in_table ,
3612+ handle_in_table_text ,
3613+ handle_in_caption ,
3614+ handle_in_column_group ,
3615+ handle_in_table_body ,
3616+ handle_in_row ,
3617+ handle_in_cell ,
3618+ handle_in_template ,
3619+ handle_after_body ,
3620+ handle_in_frameset ,
3621+ handle_after_frameset ,
3622+ handle_after_after_body ,
3623+ handle_after_after_frameset ,
3624+ };
37493625
37503626static bool handle_html_content (GumboParser * parser , GumboToken * token ) {
37513627 return kTokenHandlers [(unsigned int ) parser -> _parser_state -> _insertion_mode ](
0 commit comments