mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-29 11:41:22 +03:00
html: Simplify node info accounting
This commit is contained in:
160
HTMLparser.c
160
HTMLparser.c
@ -1417,6 +1417,21 @@ static const elementPriority htmlEndPriority[] = {
|
|||||||
* *
|
* *
|
||||||
************************************************************************/
|
************************************************************************/
|
||||||
|
|
||||||
|
static void
|
||||||
|
htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
|
||||||
|
/*
|
||||||
|
* Capture end position and add node
|
||||||
|
*/
|
||||||
|
if ( ctxt->node != NULL && ctxt->record_info ) {
|
||||||
|
ctxt->nodeInfo->end_pos = ctxt->input->consumed +
|
||||||
|
(CUR_PTR - ctxt->input->base);
|
||||||
|
ctxt->nodeInfo->end_line = ctxt->input->line;
|
||||||
|
ctxt->nodeInfo->node = ctxt->node;
|
||||||
|
xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
|
||||||
|
htmlNodeInfoPop(ctxt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlInitAutoClose:
|
* htmlInitAutoClose:
|
||||||
*
|
*
|
||||||
@ -1546,6 +1561,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
|
|||||||
"Opening and ending tag mismatch: %s and %s\n",
|
"Opening and ending tag mismatch: %s and %s\n",
|
||||||
newtag, ctxt->name);
|
newtag, ctxt->name);
|
||||||
}
|
}
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, ctxt->name);
|
ctxt->sax->endElement(ctxt->userData, ctxt->name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
@ -1566,6 +1582,7 @@ htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
|
|||||||
if (ctxt->nameNr == 0)
|
if (ctxt->nameNr == 0)
|
||||||
return;
|
return;
|
||||||
for (i = (ctxt->nameNr - 1); i >= 0; i--) {
|
for (i = (ctxt->nameNr - 1); i >= 0; i--) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, ctxt->name);
|
ctxt->sax->endElement(ctxt->userData, ctxt->name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
@ -1592,6 +1609,7 @@ htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
|
|||||||
|
|
||||||
while ((ctxt->name != NULL) &&
|
while ((ctxt->name != NULL) &&
|
||||||
(htmlCheckAutoClose(newtag, ctxt->name))) {
|
(htmlCheckAutoClose(newtag, ctxt->name))) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, ctxt->name);
|
ctxt->sax->endElement(ctxt->userData, ctxt->name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
@ -4171,9 +4189,9 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
|||||||
*/
|
*/
|
||||||
oldname = ctxt->name;
|
oldname = ctxt->name;
|
||||||
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
|
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, name);
|
ctxt->sax->endElement(ctxt->userData, name);
|
||||||
htmlNodeInfoPop(ctxt);
|
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
ret = 1;
|
ret = 1;
|
||||||
} else {
|
} else {
|
||||||
@ -4251,11 +4269,6 @@ htmlParseReference(htmlParserCtxtPtr ctxt) {
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
htmlParseContent(htmlParserCtxtPtr ctxt) {
|
htmlParseContent(htmlParserCtxtPtr ctxt) {
|
||||||
xmlChar *currentNode;
|
|
||||||
int depth;
|
|
||||||
|
|
||||||
currentNode = xmlStrdup(ctxt->name);
|
|
||||||
depth = ctxt->nameNr;
|
|
||||||
while (!PARSER_STOPPED(ctxt)) {
|
while (!PARSER_STOPPED(ctxt)) {
|
||||||
int mode;
|
int mode;
|
||||||
|
|
||||||
@ -4279,26 +4292,10 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
|||||||
* Our tag or one of it's parent or children is ending.
|
* Our tag or one of it's parent or children is ending.
|
||||||
*/
|
*/
|
||||||
if ((CUR == '<') && (NXT(1) == '/')) {
|
if ((CUR == '<') && (NXT(1) == '/')) {
|
||||||
if (htmlParseEndTag(ctxt) &&
|
htmlParseEndTag(ctxt);
|
||||||
((currentNode != NULL) || (ctxt->nameNr == 0))) {
|
|
||||||
if (currentNode != NULL)
|
|
||||||
xmlFree(currentNode);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
continue; /* while */
|
continue; /* while */
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Has this node been popped out during parsing of
|
|
||||||
* the next element
|
|
||||||
*/
|
|
||||||
if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
|
|
||||||
(!xmlStrEqual(currentNode, ctxt->name)))
|
|
||||||
{
|
|
||||||
if (currentNode != NULL) xmlFree(currentNode);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((CUR == '<') && (NXT(1) == '!')) {
|
if ((CUR == '<') && (NXT(1) == '!')) {
|
||||||
/*
|
/*
|
||||||
* Sometimes DOCTYPE arrives in the middle of the document
|
* Sometimes DOCTYPE arrives in the middle of the document
|
||||||
@ -4370,7 +4367,6 @@ done:
|
|||||||
SHRINK;
|
SHRINK;
|
||||||
GROW;
|
GROW;
|
||||||
}
|
}
|
||||||
if (currentNode != NULL) xmlFree(currentNode);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -4426,11 +4422,15 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
|
|||||||
ctxt->endCheckState = info->dataMode;
|
ctxt->endCheckState = info->dataMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ctxt->record_info)
|
||||||
|
htmlNodeInfoPush(ctxt, &node_info);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for an Empty Element labeled the XML/SGML way
|
* Check for an Empty Element labeled the XML/SGML way
|
||||||
*/
|
*/
|
||||||
if ((CUR == '/') && (NXT(1) == '>')) {
|
if ((CUR == '/') && (NXT(1) == '>')) {
|
||||||
SKIP(2);
|
SKIP(2);
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, name);
|
ctxt->sax->endElement(ctxt->userData, name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
@ -4447,20 +4447,11 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
|
|||||||
* end of parsing of this node.
|
* end of parsing of this node.
|
||||||
*/
|
*/
|
||||||
if (xmlStrEqual(name, ctxt->name)) {
|
if (xmlStrEqual(name, ctxt->name)) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
nodePop(ctxt);
|
nodePop(ctxt);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Capture end position and add node
|
|
||||||
*/
|
|
||||||
if (ctxt->record_info) {
|
|
||||||
node_info.end_pos = ctxt->input->consumed +
|
|
||||||
(CUR_PTR - ctxt->input->base);
|
|
||||||
node_info.end_line = ctxt->input->line;
|
|
||||||
node_info.node = ctxt->node;
|
|
||||||
xmlParserAddNodeInfo(ctxt, &node_info);
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4468,6 +4459,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
|
|||||||
* Check for an Empty Element from DTD definition
|
* Check for an Empty Element from DTD definition
|
||||||
*/
|
*/
|
||||||
if ((info != NULL) && (info->empty)) {
|
if ((info != NULL) && (info->empty)) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, name);
|
ctxt->sax->endElement(ctxt->userData, name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
@ -4504,24 +4496,6 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
|
|||||||
xmlFree(currentNode);
|
xmlFree(currentNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
|
|
||||||
/*
|
|
||||||
* Capture end position and add node
|
|
||||||
*/
|
|
||||||
if ( ctxt->node != NULL && ctxt->record_info ) {
|
|
||||||
ctxt->nodeInfo->end_pos = ctxt->input->consumed +
|
|
||||||
(CUR_PTR - ctxt->input->base);
|
|
||||||
ctxt->nodeInfo->end_line = ctxt->input->line;
|
|
||||||
ctxt->nodeInfo->node = ctxt->node;
|
|
||||||
xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
|
|
||||||
htmlNodeInfoPop(ctxt);
|
|
||||||
}
|
|
||||||
if (CUR == 0) {
|
|
||||||
htmlAutoCloseOnEnd(ctxt);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlParseElementInternal:
|
* htmlParseElementInternal:
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an HTML parser context
|
||||||
@ -4569,11 +4543,15 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
ctxt->endCheckState = info->dataMode;
|
ctxt->endCheckState = info->dataMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ctxt->record_info)
|
||||||
|
htmlNodeInfoPush(ctxt, &node_info);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for an Empty Element labeled the XML/SGML way
|
* Check for an Empty Element labeled the XML/SGML way
|
||||||
*/
|
*/
|
||||||
if ((CUR == '/') && (NXT(1) == '>')) {
|
if ((CUR == '/') && (NXT(1) == '>')) {
|
||||||
SKIP(2);
|
SKIP(2);
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, name);
|
ctxt->sax->endElement(ctxt->userData, name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
@ -4590,13 +4568,10 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
* end of parsing of this node.
|
* end of parsing of this node.
|
||||||
*/
|
*/
|
||||||
if (xmlStrEqual(name, ctxt->name)) {
|
if (xmlStrEqual(name, ctxt->name)) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
nodePop(ctxt);
|
nodePop(ctxt);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctxt->record_info)
|
|
||||||
htmlNodeInfoPush(ctxt, &node_info);
|
|
||||||
htmlParserFinishElementParsing(ctxt);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4604,14 +4579,12 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
* Check for an Empty Element from DTD definition
|
* Check for an Empty Element from DTD definition
|
||||||
*/
|
*/
|
||||||
if ((info != NULL) && (info->empty)) {
|
if ((info != NULL) && (info->empty)) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, name);
|
ctxt->sax->endElement(ctxt->userData, name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctxt->record_info)
|
|
||||||
htmlNodeInfoPush(ctxt, &node_info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -4624,19 +4597,6 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
|
htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
|
||||||
xmlChar *currentNode;
|
|
||||||
int depth;
|
|
||||||
|
|
||||||
depth = ctxt->nameNr;
|
|
||||||
if (depth <= 0) {
|
|
||||||
currentNode = NULL;
|
|
||||||
} else {
|
|
||||||
currentNode = xmlStrdup(ctxt->name);
|
|
||||||
if (currentNode == NULL) {
|
|
||||||
htmlErrMemory(ctxt);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (PARSER_STOPPED(ctxt) == 0) {
|
while (PARSER_STOPPED(ctxt) == 0) {
|
||||||
int mode;
|
int mode;
|
||||||
|
|
||||||
@ -4660,48 +4620,10 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
* Our tag or one of it's parent or children is ending.
|
* Our tag or one of it's parent or children is ending.
|
||||||
*/
|
*/
|
||||||
if ((CUR == '<') && (NXT(1) == '/')) {
|
if ((CUR == '<') && (NXT(1) == '/')) {
|
||||||
if (htmlParseEndTag(ctxt) &&
|
htmlParseEndTag(ctxt);
|
||||||
((currentNode != NULL) || (ctxt->nameNr == 0))) {
|
|
||||||
if (currentNode != NULL)
|
|
||||||
xmlFree(currentNode);
|
|
||||||
|
|
||||||
depth = ctxt->nameNr;
|
|
||||||
if (depth <= 0) {
|
|
||||||
currentNode = NULL;
|
|
||||||
} else {
|
|
||||||
currentNode = xmlStrdup(ctxt->name);
|
|
||||||
if (currentNode == NULL) {
|
|
||||||
htmlErrMemory(ctxt);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue; /* while */
|
continue; /* while */
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Has this node been popped out during parsing of
|
|
||||||
* the next element
|
|
||||||
*/
|
|
||||||
if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
|
|
||||||
(!xmlStrEqual(currentNode, ctxt->name)))
|
|
||||||
{
|
|
||||||
htmlParserFinishElementParsing(ctxt);
|
|
||||||
if (currentNode != NULL) xmlFree(currentNode);
|
|
||||||
|
|
||||||
if (ctxt->name == NULL) {
|
|
||||||
currentNode = NULL;
|
|
||||||
} else {
|
|
||||||
currentNode = xmlStrdup(ctxt->name);
|
|
||||||
if (currentNode == NULL) {
|
|
||||||
htmlErrMemory(ctxt);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
depth = ctxt->nameNr;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((CUR == '<') && (NXT(1) == '!')) {
|
if ((CUR == '<') && (NXT(1) == '!')) {
|
||||||
/*
|
/*
|
||||||
* Sometimes DOCTYPE arrives in the middle of the document
|
* Sometimes DOCTYPE arrives in the middle of the document
|
||||||
@ -4738,18 +4660,6 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
*/
|
*/
|
||||||
else if ((CUR == '<') && IS_ASCII_LETTER(NXT(1))) {
|
else if ((CUR == '<') && IS_ASCII_LETTER(NXT(1))) {
|
||||||
htmlParseElementInternal(ctxt);
|
htmlParseElementInternal(ctxt);
|
||||||
if (currentNode != NULL) xmlFree(currentNode);
|
|
||||||
|
|
||||||
if (ctxt->name == NULL) {
|
|
||||||
currentNode = NULL;
|
|
||||||
} else {
|
|
||||||
currentNode = xmlStrdup(ctxt->name);
|
|
||||||
if (currentNode == NULL) {
|
|
||||||
htmlErrMemory(ctxt);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
depth = ctxt->nameNr;
|
|
||||||
}
|
}
|
||||||
else if (CUR == '<') {
|
else if (CUR == '<') {
|
||||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
||||||
@ -4785,7 +4695,6 @@ done:
|
|||||||
SHRINK;
|
SHRINK;
|
||||||
GROW;
|
GROW;
|
||||||
}
|
}
|
||||||
if (currentNode != NULL) xmlFree(currentNode);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlNodePtr
|
xmlNodePtr
|
||||||
@ -5686,6 +5595,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
*/
|
*/
|
||||||
if ((CUR == '/') && (NXT(1) == '>')) {
|
if ((CUR == '/') && (NXT(1) == '>')) {
|
||||||
SKIP(2);
|
SKIP(2);
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, name);
|
ctxt->sax->endElement(ctxt->userData, name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
@ -5704,6 +5614,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
* end of parsing of this node.
|
* end of parsing of this node.
|
||||||
*/
|
*/
|
||||||
if (xmlStrEqual(name, ctxt->name)) {
|
if (xmlStrEqual(name, ctxt->name)) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
nodePop(ctxt);
|
nodePop(ctxt);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
}
|
}
|
||||||
@ -5719,6 +5630,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
* Check for an Empty Element from DTD definition
|
* Check for an Empty Element from DTD definition
|
||||||
*/
|
*/
|
||||||
if ((info != NULL) && (info->empty)) {
|
if ((info != NULL) && (info->empty)) {
|
||||||
|
htmlParserFinishElementParsing(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
||||||
ctxt->sax->endElement(ctxt->userData, name);
|
ctxt->sax->endElement(ctxt->userData, name);
|
||||||
htmlnamePop(ctxt);
|
htmlnamePop(ctxt);
|
||||||
|
Reference in New Issue
Block a user