1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

another entity processing update from Markus Henke Daniel

* tree.c: another entity processing update from Markus Henke
Daniel
This commit is contained in:
Daniel Veillard
2002-04-20 06:41:40 +00:00
parent 54d02fb30d
commit 7646b18d64
5 changed files with 393 additions and 173 deletions

View File

@ -1,3 +1,7 @@
Fri Apr 19 18:26:04 CEST 2002 Daniel Veillard <daniel@veillard.com>
* tree.c: another entity processing update from Markus Henke
Fri Apr 19 17:14:24 CEST 2002 Bjorn Reese <breese@users.sourceforge.net> Fri Apr 19 17:14:24 CEST 2002 Bjorn Reese <breese@users.sourceforge.net>
* trionan.c: fixed crash on OSF/1 * trionan.c: fixed crash on OSF/1

View File

@ -60,6 +60,16 @@ xmlAutomataStatePtr xmlAutomataNewCountTrans(xmlAutomataPtr am,
int min, int min,
int max, int max,
void *data); void *data);
xmlAutomataStatePtr xmlAutomataNewOnceTrans (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to,
const xmlChar *token,
int min,
int max,
void *data);
xmlAutomataStatePtr xmlAutomataNewAllTrans (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to);
xmlAutomataStatePtr xmlAutomataNewEpsilon (xmlAutomataPtr am, xmlAutomataStatePtr xmlAutomataNewEpsilon (xmlAutomataPtr am,
xmlAutomataStatePtr from, xmlAutomataStatePtr from,
xmlAutomataStatePtr to); xmlAutomataStatePtr to);

417
tree.c
View File

@ -902,56 +902,73 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
* Returns a pointer to the string copy, the caller must free it. * Returns a pointer to the string copy, the caller must free it.
*/ */
xmlChar * xmlChar *
xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) { xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
{
xmlNodePtr node = list; xmlNodePtr node = list;
xmlChar *ret = NULL; xmlChar *ret = NULL;
xmlEntityPtr ent; xmlEntityPtr ent;
if (list == NULL) return(NULL); if (list == NULL)
return (NULL);
while (node != NULL) { while (node != NULL) {
if ((node->type == XML_TEXT_NODE) || if ((node->type == XML_TEXT_NODE) ||
(node->type == XML_CDATA_SECTION_NODE)) { (node->type == XML_CDATA_SECTION_NODE)) {
if (inLine) { if (inLine) {
ret = xmlStrcat(ret, node->content); ret = xmlStrcat(ret, node->content);
} else {
xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, node->content);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
}
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL)
ret = xmlStrcat(ret, ent->content);
else {
ret = xmlStrcat(ret, node->content);
}
} else { } else {
xmlChar buf[2]; xmlChar *buffer;
buf[0] = '&'; buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';'; buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
#if 0
else {
xmlGenericError(xmlGenericErrorContext,
"xmlGetNodeListString : invalid node type %d\n",
node->type);
}
#endif
node = node->next;
}
return(ret);
}
buffer = xmlEncodeEntitiesReentrant(doc, node->content);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
}
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL) {
xmlChar *buffer;
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content.
* So it can contain text, CDATA section or nested
* entity reference nodes (among others).
* -> we recursive call xmlNodeListGetString()
* which handles these types */
buffer = xmlNodeListGetString(doc, ent->children, 1);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
} else {
ret = xmlStrcat(ret, node->content);
}
} else {
xmlChar buf[2];
buf[0] = '&';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
#if 0
else {
xmlGenericError(xmlGenericErrorContext,
"xmlGetNodeListString : invalid node type %d\n",
node->type);
}
#endif
node = node->next;
}
return (ret);
}
/** /**
* xmlNodeListGetRawString: * xmlNodeListGetRawString:
* @doc: the document * @doc: the document
@ -965,54 +982,73 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
* Returns a pointer to the string copy, the caller must free it. * Returns a pointer to the string copy, the caller must free it.
*/ */
xmlChar * xmlChar *
xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine) { xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine)
{
xmlNodePtr node = list; xmlNodePtr node = list;
xmlChar *ret = NULL; xmlChar *ret = NULL;
xmlEntityPtr ent; xmlEntityPtr ent;
if (list == NULL) return(NULL); if (list == NULL)
return (NULL);
while (node != NULL) { while (node != NULL) {
if ((node->type == XML_TEXT_NODE) || if ((node->type == XML_TEXT_NODE) ||
(node->type == XML_CDATA_SECTION_NODE)) { (node->type == XML_CDATA_SECTION_NODE)) {
if (inLine) { if (inLine) {
ret = xmlStrcat(ret, node->content); ret = xmlStrcat(ret, node->content);
} else {
xmlChar *buffer;
buffer = xmlEncodeSpecialChars(doc, node->content);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
}
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL)
ret = xmlStrcat(ret, ent->content);
else {
ret = xmlStrcat(ret, node->content);
}
} else { } else {
xmlChar buf[2]; xmlChar *buffer;
buf[0] = '&'; buf[1] = 0;
ret = xmlStrncat(ret, buf, 1); buffer = xmlEncodeSpecialChars(doc, node->content);
ret = xmlStrcat(ret, node->name); if (buffer != NULL) {
buf[0] = ';'; buf[1] = 0; ret = xmlStrcat(ret, buffer);
ret = xmlStrncat(ret, buf, 1); xmlFree(buffer);
} }
} }
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL) {
xmlChar *buffer;
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content.
* So it can contain text, CDATA section or nested
* entity reference nodes (among others).
* -> we recursive call xmlNodeListGetRawString()
* which handles these types */
buffer =
xmlNodeListGetRawString(doc, ent->children, 1);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
} else {
ret = xmlStrcat(ret, node->content);
}
} else {
xmlChar buf[2];
buf[0] = '&';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
#if 0 #if 0
else { else {
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"xmlGetNodeListString : invalid node type %d\n", "xmlGetNodeListString : invalid node type %d\n",
node->type); node->type);
} }
#endif #endif
node = node->next; node = node->next;
} }
return(ret); return (ret);
} }
/** /**
@ -3763,122 +3799,159 @@ xmlNodeGetBase(xmlDocPtr doc, xmlNodePtr cur) {
* It's up to the caller to free the memory. * It's up to the caller to free the memory.
*/ */
xmlChar * xmlChar *
xmlNodeGetContent(xmlNodePtr cur) { xmlNodeGetContent(xmlNodePtr cur)
if (cur == NULL) return(NULL); {
if (cur == NULL)
return (NULL);
switch (cur->type) { switch (cur->type) {
case XML_DOCUMENT_FRAG_NODE: case XML_DOCUMENT_FRAG_NODE:
case XML_ELEMENT_NODE: { case XML_ELEMENT_NODE:{
xmlNodePtr tmp = cur; xmlNodePtr tmp = cur;
xmlBufferPtr buffer; xmlBufferPtr buffer;
xmlChar *ret; xmlChar *ret;
buffer = xmlBufferCreate(); buffer = xmlBufferCreate();
if (buffer == NULL) if (buffer == NULL)
return(NULL); return (NULL);
while (tmp != NULL) { while (tmp != NULL) {
switch (tmp->type) { switch (tmp->type) {
case XML_CDATA_SECTION_NODE: case XML_CDATA_SECTION_NODE:
case XML_TEXT_NODE: case XML_TEXT_NODE:
if (tmp->content != NULL) if (tmp->content != NULL)
xmlBufferCat(buffer, tmp->content); xmlBufferCat(buffer, tmp->content);
break; break;
case XML_ENTITY_REF_NODE: { case XML_ENTITY_REF_NODE:{
xmlEntityPtr ent; /* recursive substitution of entity references */
xmlChar *cont = xmlNodeGetContent(tmp);
ent = xmlGetDocEntity(cur->doc, tmp->name); if (cont) {
if (ent != NULL) xmlBufferCat(buffer,
xmlBufferCat(buffer, ent->content); (const xmlChar *) cont);
} xmlFree(cont);
default: }
break; break;
} }
/* default:
* Skip to next node break;
*/ }
if (tmp->children != NULL) { /*
if (tmp->children->type != XML_ENTITY_DECL) { * Skip to next node
tmp = tmp->children; */
continue; if (tmp->children != NULL) {
} if (tmp->children->type != XML_ENTITY_DECL) {
} tmp = tmp->children;
if (tmp == cur) continue;
break; }
}
if (tmp == cur)
break;
if (tmp->next != NULL) { if (tmp->next != NULL) {
tmp = tmp->next; tmp = tmp->next;
continue; continue;
} }
do { do {
tmp = tmp->parent; tmp = tmp->parent;
if (tmp == NULL) if (tmp == NULL)
break; break;
if (tmp == cur) { if (tmp == cur) {
tmp = NULL; tmp = NULL;
break; break;
} }
if (tmp->next != NULL) { if (tmp->next != NULL) {
tmp = tmp->next; tmp = tmp->next;
break; break;
} }
} while (tmp != NULL); } while (tmp != NULL);
} }
ret = buffer->content; ret = buffer->content;
buffer->content = NULL; buffer->content = NULL;
xmlBufferFree(buffer); xmlBufferFree(buffer);
return(ret); return (ret);
} }
case XML_ATTRIBUTE_NODE: { case XML_ATTRIBUTE_NODE:{
xmlAttrPtr attr = (xmlAttrPtr) cur; xmlAttrPtr attr = (xmlAttrPtr) cur;
if (attr->parent != NULL)
return(xmlNodeListGetString(attr->parent->doc, attr->children, 1)); if (attr->parent != NULL)
else return (xmlNodeListGetString
return(xmlNodeListGetString(NULL, attr->children, 1)); (attr->parent->doc, attr->children, 1));
break; else
} return (xmlNodeListGetString(NULL, attr->children, 1));
break;
}
case XML_COMMENT_NODE: case XML_COMMENT_NODE:
case XML_PI_NODE: case XML_PI_NODE:
if (cur->content != NULL) if (cur->content != NULL)
return(xmlStrdup(cur->content)); return (xmlStrdup(cur->content));
return(NULL); return (NULL);
case XML_ENTITY_REF_NODE: case XML_ENTITY_REF_NODE:{
/* xmlEntityPtr ent;
* Locate the entity, and get it's content xmlNodePtr tmp;
* @@@ xmlBufferPtr buffer;
*/ xmlChar *ret;
return(NULL);
/* lookup entity declaration */
ent = xmlGetDocEntity(cur->doc, cur->name);
if (ent == NULL)
return (NULL);
buffer = xmlBufferCreate();
if (buffer == NULL)
return (NULL);
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content
* -> we iterate through child nodes and recursive call
* xmlNodeGetContent() which handles all possible node types */
tmp = ent->children;
while (tmp) {
xmlChar *cont = xmlNodeGetContent(tmp);
if (cont) {
xmlBufferCat(buffer, (const xmlChar *) cont);
xmlFree(cont);
}
tmp = tmp->next;
}
ret = buffer->content;
buffer->content = NULL;
xmlBufferFree(buffer);
return (ret);
}
case XML_ENTITY_NODE: case XML_ENTITY_NODE:
case XML_DOCUMENT_NODE: case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_TYPE_NODE:
case XML_NOTATION_NODE: case XML_NOTATION_NODE:
case XML_DTD_NODE: case XML_DTD_NODE:
case XML_XINCLUDE_START: case XML_XINCLUDE_START:
case XML_XINCLUDE_END: case XML_XINCLUDE_END:
#ifdef LIBXML_DOCB_ENABLED #ifdef LIBXML_DOCB_ENABLED
case XML_DOCB_DOCUMENT_NODE: case XML_DOCB_DOCUMENT_NODE:
#endif #endif
return(NULL); return (NULL);
case XML_NAMESPACE_DECL: case XML_NAMESPACE_DECL:
return(xmlStrdup(((xmlNsPtr)cur)->href)); return (xmlStrdup(((xmlNsPtr) cur)->href));
case XML_ELEMENT_DECL: case XML_ELEMENT_DECL:
/* TODO !!! */ /* TODO !!! */
return(NULL); return (NULL);
case XML_ATTRIBUTE_DECL: case XML_ATTRIBUTE_DECL:
/* TODO !!! */ /* TODO !!! */
return(NULL); return (NULL);
case XML_ENTITY_DECL: case XML_ENTITY_DECL:
/* TODO !!! */ /* TODO !!! */
return(NULL); return (NULL);
case XML_CDATA_SECTION_NODE: case XML_CDATA_SECTION_NODE:
case XML_TEXT_NODE: case XML_TEXT_NODE:
if (cur->content != NULL) if (cur->content != NULL)
return(xmlStrdup(cur->content)); return (xmlStrdup(cur->content));
return(NULL); return (NULL);
} }
return(NULL); return (NULL);
} }
/** /**
* xmlNodeSetContent: * xmlNodeSetContent:
* @cur: the node being modified * @cur: the node being modified

View File

@ -109,6 +109,8 @@ typedef enum {
XML_REGEXP_QUANT_OPT, XML_REGEXP_QUANT_OPT,
XML_REGEXP_QUANT_MULT, XML_REGEXP_QUANT_MULT,
XML_REGEXP_QUANT_PLUS, XML_REGEXP_QUANT_PLUS,
XML_REGEXP_QUANT_ONCEONLY,
XML_REGEXP_QUANT_ALL,
XML_REGEXP_QUANT_RANGE XML_REGEXP_QUANT_RANGE
} xmlRegQuantType; } xmlRegQuantType;
@ -279,6 +281,8 @@ struct _xmlRegExecCtxt {
}; };
#define REGEXP_ALL_COUNTER 0x123456
static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top); static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
/************************************************************************ /************************************************************************
@ -630,6 +634,10 @@ xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
fprintf(output, "+ "); break; fprintf(output, "+ "); break;
case XML_REGEXP_QUANT_RANGE: case XML_REGEXP_QUANT_RANGE:
fprintf(output, "range "); break; fprintf(output, "range "); break;
case XML_REGEXP_QUANT_ONCEONLY:
fprintf(output, "onceonly "); break;
case XML_REGEXP_QUANT_ALL:
fprintf(output, "all "); break;
} }
} }
static void static void
@ -942,6 +950,24 @@ xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
ctxt->states[ctxt->nbStates++] = state; ctxt->states[ctxt->nbStates++] = state;
} }
/**
* xmlFAGenerateAllTransition:
* ctxt: a regexp parser context
* from: the from state
* to: the target state or NULL for building a new one
*
*/
static void
xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePtr from, xmlRegStatePtr to) {
if (to == NULL) {
to = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, to);
ctxt->state = to;
}
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
}
/** /**
* xmlFAGenerateEpsilonTransition: * xmlFAGenerateEpsilonTransition:
* ctxt: a regexp parser context * ctxt: a regexp parser context
@ -3423,6 +3449,69 @@ xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
return(to); return(to);
} }
/**
* xmlAutomataNewOnceTrans:
* @am: an automata
* @from: the starting point of the transition
* @to: the target point of the transition or NULL
* @token: the input string associated to that transition
* @min: the minimum successive occurences of token
* @min: the maximum successive occurences of token
*
* If @to is NULL, this create first a new target state in the automata
* and then adds a transition from the @from state to the target state
* activated by a succession of input of value @token and whose number
* is between @min and @max, moreover that transistion can only be crossed
* once.
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
xmlAutomataStatePtr to, const xmlChar *token,
int min, int max, void *data) {
xmlRegAtomPtr atom;
int counter;
if ((am == NULL) || (from == NULL) || (token == NULL))
return(NULL);
if (min < 1)
return(NULL);
if ((max < min) || (max < 1))
return(NULL);
atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
if (atom == NULL)
return(NULL);
atom->valuep = xmlStrdup(token);
atom->data = data;
atom->quant = XML_REGEXP_QUANT_ONCEONLY;
if (min == 0)
atom->min = 1;
else
atom->min = min;
atom->max = max;
/*
* associate a counter to the transition.
*/
counter = xmlRegGetCounter(am);
am->counters[counter].min = 1;
am->counters[counter].max = 1;
/* xmlFAGenerateTransitions(am, from, to, atom); */
if (to == NULL) {
to = xmlRegNewState(am);
xmlRegStatePush(am, to);
}
xmlRegStateAddTrans(am, from, atom, to, counter, -1);
xmlRegAtomPush(am, atom);
am->state = to;
if (to == NULL)
to = am->state;
if (to == NULL)
return(NULL);
return(to);
}
/** /**
* xmlAutomataNewState: * xmlAutomataNewState:
* @am: an automata * @am: an automata
@ -3465,6 +3554,30 @@ xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
return(to); return(to);
} }
/**
* xmlAutomataNewAllTrans:
* @am: an automata
* @from: the starting point of the transition
* @to: the target point of the transition or NULL
*
* If @to is NULL, this create first a new target state in the automata
* and then adds a an ALL transition from the @from state to the
* target state. That transition is an epsilon transition allowed only when
* all transitions from the @from node have been activated.
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
xmlAutomataStatePtr to) {
if ((am == NULL) || (from == NULL))
return(NULL);
xmlFAGenerateAllTransition(am, from, to);
if (to == NULL)
return(am->state);
return(to);
}
/** /**
* xmlAutomataNewCounter: * xmlAutomataNewCounter:
* @am: an automata * @am: an automata

View File

@ -2119,7 +2119,7 @@ xmlSchemaParseAll(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
if (type == NULL) if (type == NULL)
return (NULL); return (NULL);
type->node = node; type->node = node;
type->type = XML_SCHEMA_TYPE_SEQUENCE; type->type = XML_SCHEMA_TYPE_ALL;
type->id = xmlGetProp(node, BAD_CAST "id"); type->id = xmlGetProp(node, BAD_CAST "id");
type->minOccurs = xmlGetMinOccurs(ctxt, node); type->minOccurs = xmlGetMinOccurs(ctxt, node);
type->maxOccurs = xmlGetMaxOccurs(ctxt, node); type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
@ -3037,6 +3037,26 @@ xmlSchemaBuildAContentModel(xmlSchemaTypePtr type,
break; break;
} }
case XML_SCHEMA_TYPE_ALL: { case XML_SCHEMA_TYPE_ALL: {
xmlAutomataStatePtr end;
xmlAutomataStatePtr start;
xmlSchemaTypePtr subtypes;
xmlSchemaElementPtr elem = (xmlSchemaElementPtr) type;
subtypes = type->subtypes;
if (subtypes == NULL)
break;
start = ctxt->state;
while (subtypes != NULL) {
ctxt->state = start;
elem = (xmlSchemaElementPtr) subtypes;
/* TODO : handle the namespace too */
xmlAutomataNewOnceTrans(ctxt->am, ctxt->state, ctxt->state,
elem->name, elem->minOccurs, elem->maxOccurs,
subtypes);
subtypes = subtypes->next;
}
ctxt->state = xmlAutomataNewAllTrans(ctxt->am, ctxt->state, NULL);
TODO TODO
break; break;
} }