1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

another entity processing update from Markus Henke Daniel

* tree.c: another entity processing update from Markus Henke
Daniel
This commit is contained in:
Daniel Veillard
2002-04-20 06:41:40 +00:00
parent 54d02fb30d
commit 7646b18d64
5 changed files with 393 additions and 173 deletions

View File

@ -1,3 +1,7 @@
Fri Apr 19 18:26:04 CEST 2002 Daniel Veillard <daniel@veillard.com>
* tree.c: another entity processing update from Markus Henke
Fri Apr 19 17:14:24 CEST 2002 Bjorn Reese <breese@users.sourceforge.net>
* trionan.c: fixed crash on OSF/1

View File

@ -60,6 +60,16 @@ xmlAutomataStatePtr xmlAutomataNewCountTrans(xmlAutomataPtr am,
int min,
int max,
void *data);
xmlAutomataStatePtr xmlAutomataNewOnceTrans (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to,
const xmlChar *token,
int min,
int max,
void *data);
xmlAutomataStatePtr xmlAutomataNewAllTrans (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to);
xmlAutomataStatePtr xmlAutomataNewEpsilon (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to);

167
tree.c
View File

@ -902,12 +902,14 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
* Returns a pointer to the string copy, the caller must free it.
*/
xmlChar *
xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
{
xmlNodePtr node = list;
xmlChar *ret = NULL;
xmlEntityPtr ent;
if (list == NULL) return(NULL);
if (list == NULL)
return (NULL);
while (node != NULL) {
if ((node->type == XML_TEXT_NODE) ||
@ -926,17 +928,33 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL)
ret = xmlStrcat(ret, ent->content);
else {
if (ent != NULL) {
xmlChar *buffer;
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content.
* So it can contain text, CDATA section or nested
* entity reference nodes (among others).
* -> we recursive call xmlNodeListGetString()
* which handles these types */
buffer = xmlNodeListGetString(doc, ent->children, 1);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
} else {
ret = xmlStrcat(ret, node->content);
}
} else {
xmlChar buf[2];
buf[0] = '&'; buf[1] = 0;
buf[0] = '&';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';'; buf[1] = 0;
buf[0] = ';';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
@ -949,9 +967,8 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
#endif
node = node->next;
}
return(ret);
return (ret);
}
/**
* xmlNodeListGetRawString:
* @doc: the document
@ -965,12 +982,14 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
* Returns a pointer to the string copy, the caller must free it.
*/
xmlChar *
xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine)
{
xmlNodePtr node = list;
xmlChar *ret = NULL;
xmlEntityPtr ent;
if (list == NULL) return(NULL);
if (list == NULL)
return (NULL);
while (node != NULL) {
if ((node->type == XML_TEXT_NODE) ||
@ -989,17 +1008,34 @@ xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
} else if (node->type == XML_ENTITY_REF_NODE) {
if (inLine) {
ent = xmlGetDocEntity(doc, node->name);
if (ent != NULL)
ret = xmlStrcat(ret, ent->content);
else {
if (ent != NULL) {
xmlChar *buffer;
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content.
* So it can contain text, CDATA section or nested
* entity reference nodes (among others).
* -> we recursive call xmlNodeListGetRawString()
* which handles these types */
buffer =
xmlNodeListGetRawString(doc, ent->children, 1);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);
}
} else {
ret = xmlStrcat(ret, node->content);
}
} else {
xmlChar buf[2];
buf[0] = '&'; buf[1] = 0;
buf[0] = '&';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
ret = xmlStrcat(ret, node->name);
buf[0] = ';'; buf[1] = 0;
buf[0] = ';';
buf[1] = 0;
ret = xmlStrncat(ret, buf, 1);
}
}
@ -1012,7 +1048,7 @@ xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
#endif
node = node->next;
}
return(ret);
return (ret);
}
/**
@ -3763,18 +3799,20 @@ xmlNodeGetBase(xmlDocPtr doc, xmlNodePtr cur) {
* It's up to the caller to free the memory.
*/
xmlChar *
xmlNodeGetContent(xmlNodePtr cur) {
if (cur == NULL) return(NULL);
xmlNodeGetContent(xmlNodePtr cur)
{
if (cur == NULL)
return (NULL);
switch (cur->type) {
case XML_DOCUMENT_FRAG_NODE:
case XML_ELEMENT_NODE: {
case XML_ELEMENT_NODE:{
xmlNodePtr tmp = cur;
xmlBufferPtr buffer;
xmlChar *ret;
buffer = xmlBufferCreate();
if (buffer == NULL)
return(NULL);
return (NULL);
while (tmp != NULL) {
switch (tmp->type) {
case XML_CDATA_SECTION_NODE:
@ -3782,12 +3820,16 @@ xmlNodeGetContent(xmlNodePtr cur) {
if (tmp->content != NULL)
xmlBufferCat(buffer, tmp->content);
break;
case XML_ENTITY_REF_NODE: {
xmlEntityPtr ent;
case XML_ENTITY_REF_NODE:{
/* recursive substitution of entity references */
xmlChar *cont = xmlNodeGetContent(tmp);
ent = xmlGetDocEntity(cur->doc, tmp->name);
if (ent != NULL)
xmlBufferCat(buffer, ent->content);
if (cont) {
xmlBufferCat(buffer,
(const xmlChar *) cont);
xmlFree(cont);
}
break;
}
default:
break;
@ -3826,27 +3868,59 @@ xmlNodeGetContent(xmlNodePtr cur) {
ret = buffer->content;
buffer->content = NULL;
xmlBufferFree(buffer);
return(ret);
return (ret);
}
case XML_ATTRIBUTE_NODE: {
case XML_ATTRIBUTE_NODE:{
xmlAttrPtr attr = (xmlAttrPtr) cur;
if (attr->parent != NULL)
return(xmlNodeListGetString(attr->parent->doc, attr->children, 1));
return (xmlNodeListGetString
(attr->parent->doc, attr->children, 1));
else
return(xmlNodeListGetString(NULL, attr->children, 1));
return (xmlNodeListGetString(NULL, attr->children, 1));
break;
}
case XML_COMMENT_NODE:
case XML_PI_NODE:
if (cur->content != NULL)
return(xmlStrdup(cur->content));
return(NULL);
case XML_ENTITY_REF_NODE:
/*
* Locate the entity, and get it's content
* @@@
*/
return(NULL);
return (xmlStrdup(cur->content));
return (NULL);
case XML_ENTITY_REF_NODE:{
xmlEntityPtr ent;
xmlNodePtr tmp;
xmlBufferPtr buffer;
xmlChar *ret;
/* lookup entity declaration */
ent = xmlGetDocEntity(cur->doc, cur->name);
if (ent == NULL)
return (NULL);
buffer = xmlBufferCreate();
if (buffer == NULL)
return (NULL);
/* an entity content can be any "well balanced chunk",
* i.e. the result of the content [43] production:
* http://www.w3.org/TR/REC-xml#NT-content
* -> we iterate through child nodes and recursive call
* xmlNodeGetContent() which handles all possible node types */
tmp = ent->children;
while (tmp) {
xmlChar *cont = xmlNodeGetContent(tmp);
if (cont) {
xmlBufferCat(buffer, (const xmlChar *) cont);
xmlFree(cont);
}
tmp = tmp->next;
}
ret = buffer->content;
buffer->content = NULL;
xmlBufferFree(buffer);
return (ret);
}
case XML_ENTITY_NODE:
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
@ -3858,27 +3932,26 @@ xmlNodeGetContent(xmlNodePtr cur) {
#ifdef LIBXML_DOCB_ENABLED
case XML_DOCB_DOCUMENT_NODE:
#endif
return(NULL);
return (NULL);
case XML_NAMESPACE_DECL:
return(xmlStrdup(((xmlNsPtr)cur)->href));
return (xmlStrdup(((xmlNsPtr) cur)->href));
case XML_ELEMENT_DECL:
/* TODO !!! */
return(NULL);
return (NULL);
case XML_ATTRIBUTE_DECL:
/* TODO !!! */
return(NULL);
return (NULL);
case XML_ENTITY_DECL:
/* TODO !!! */
return(NULL);
return (NULL);
case XML_CDATA_SECTION_NODE:
case XML_TEXT_NODE:
if (cur->content != NULL)
return(xmlStrdup(cur->content));
return(NULL);
return (xmlStrdup(cur->content));
return (NULL);
}
return(NULL);
return (NULL);
}
/**
* xmlNodeSetContent:
* @cur: the node being modified

View File

@ -109,6 +109,8 @@ typedef enum {
XML_REGEXP_QUANT_OPT,
XML_REGEXP_QUANT_MULT,
XML_REGEXP_QUANT_PLUS,
XML_REGEXP_QUANT_ONCEONLY,
XML_REGEXP_QUANT_ALL,
XML_REGEXP_QUANT_RANGE
} xmlRegQuantType;
@ -279,6 +281,8 @@ struct _xmlRegExecCtxt {
};
#define REGEXP_ALL_COUNTER 0x123456
static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
/************************************************************************
@ -630,6 +634,10 @@ xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
fprintf(output, "+ "); break;
case XML_REGEXP_QUANT_RANGE:
fprintf(output, "range "); break;
case XML_REGEXP_QUANT_ONCEONLY:
fprintf(output, "onceonly "); break;
case XML_REGEXP_QUANT_ALL:
fprintf(output, "all "); break;
}
}
static void
@ -942,6 +950,24 @@ xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
ctxt->states[ctxt->nbStates++] = state;
}
/**
* xmlFAGenerateAllTransition:
* ctxt: a regexp parser context
* from: the from state
* to: the target state or NULL for building a new one
*
*/
static void
xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePtr from, xmlRegStatePtr to) {
if (to == NULL) {
to = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, to);
ctxt->state = to;
}
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
}
/**
* xmlFAGenerateEpsilonTransition:
* ctxt: a regexp parser context
@ -3423,6 +3449,69 @@ xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
return(to);
}
/**
* xmlAutomataNewOnceTrans:
* @am: an automata
* @from: the starting point of the transition
* @to: the target point of the transition or NULL
* @token: the input string associated to that transition
* @min: the minimum successive occurences of token
* @min: the maximum successive occurences of token
*
* If @to is NULL, this create first a new target state in the automata
* and then adds a transition from the @from state to the target state
* activated by a succession of input of value @token and whose number
* is between @min and @max, moreover that transistion can only be crossed
* once.
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
xmlAutomataStatePtr to, const xmlChar *token,
int min, int max, void *data) {
xmlRegAtomPtr atom;
int counter;
if ((am == NULL) || (from == NULL) || (token == NULL))
return(NULL);
if (min < 1)
return(NULL);
if ((max < min) || (max < 1))
return(NULL);
atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
if (atom == NULL)
return(NULL);
atom->valuep = xmlStrdup(token);
atom->data = data;
atom->quant = XML_REGEXP_QUANT_ONCEONLY;
if (min == 0)
atom->min = 1;
else
atom->min = min;
atom->max = max;
/*
* associate a counter to the transition.
*/
counter = xmlRegGetCounter(am);
am->counters[counter].min = 1;
am->counters[counter].max = 1;
/* xmlFAGenerateTransitions(am, from, to, atom); */
if (to == NULL) {
to = xmlRegNewState(am);
xmlRegStatePush(am, to);
}
xmlRegStateAddTrans(am, from, atom, to, counter, -1);
xmlRegAtomPush(am, atom);
am->state = to;
if (to == NULL)
to = am->state;
if (to == NULL)
return(NULL);
return(to);
}
/**
* xmlAutomataNewState:
* @am: an automata
@ -3465,6 +3554,30 @@ xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
return(to);
}
/**
* xmlAutomataNewAllTrans:
* @am: an automata
* @from: the starting point of the transition
* @to: the target point of the transition or NULL
*
* If @to is NULL, this create first a new target state in the automata
* and then adds a an ALL transition from the @from state to the
* target state. That transition is an epsilon transition allowed only when
* all transitions from the @from node have been activated.
*
* Returns the target state or NULL in case of error
*/
xmlAutomataStatePtr
xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
xmlAutomataStatePtr to) {
if ((am == NULL) || (from == NULL))
return(NULL);
xmlFAGenerateAllTransition(am, from, to);
if (to == NULL)
return(am->state);
return(to);
}
/**
* xmlAutomataNewCounter:
* @am: an automata

View File

@ -2119,7 +2119,7 @@ xmlSchemaParseAll(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
if (type == NULL)
return (NULL);
type->node = node;
type->type = XML_SCHEMA_TYPE_SEQUENCE;
type->type = XML_SCHEMA_TYPE_ALL;
type->id = xmlGetProp(node, BAD_CAST "id");
type->minOccurs = xmlGetMinOccurs(ctxt, node);
type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
@ -3037,6 +3037,26 @@ xmlSchemaBuildAContentModel(xmlSchemaTypePtr type,
break;
}
case XML_SCHEMA_TYPE_ALL: {
xmlAutomataStatePtr end;
xmlAutomataStatePtr start;
xmlSchemaTypePtr subtypes;
xmlSchemaElementPtr elem = (xmlSchemaElementPtr) type;
subtypes = type->subtypes;
if (subtypes == NULL)
break;
start = ctxt->state;
while (subtypes != NULL) {
ctxt->state = start;
elem = (xmlSchemaElementPtr) subtypes;
/* TODO : handle the namespace too */
xmlAutomataNewOnceTrans(ctxt->am, ctxt->state, ctxt->state,
elem->name, elem->minOccurs, elem->maxOccurs,
subtypes);
subtypes = subtypes->next;
}
ctxt->state = xmlAutomataNewAllTrans(ctxt->am, ctxt->state, NULL);
TODO
break;
}