1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

This is the 2.0.0-beta, lots and lots and lots of changes

Have a look at http://xmlsoft.org/upgrade.html

Daniel
This commit is contained in:
Daniel Veillard
2000-03-14 18:30:20 +00:00
parent 76234da152
commit cf46199c5e
91 changed files with 9978 additions and 5547 deletions

View File

@ -1,3 +1,11 @@
Tue Mar 14 19:11:29 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* all: tagged LIB_XML_1_X
* *.c *.h : updated from W3C CVS tree
* configure.in : 2.0.0-beta
* libxml.spec.in : libxml2 package nam
* result/* : new version of the tests output
Mon Mar 6 09:34:52 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org> Mon Mar 6 09:34:52 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* doc/xml.html, doc/update.html: updated docs, 1.8.7 * doc/xml.html, doc/update.html: updated docs, 1.8.7

View File

@ -121,36 +121,81 @@ PUSH_AND_POP(extern, xmlChar*, name)
* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
*/ */
#define CUR (*ctxt->input->cur) #define CUR ((int) (*ctxt->input->cur))
#define UPPER (toupper(*ctxt->input->cur)) #define UPPER (toupper(*ctxt->input->cur))
#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val) #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
#define NXT(val) ctxt->input->cur[(val)] #define NXT(val) ctxt->input->cur[(val)]
#define UPP(val) (toupper(ctxt->input->cur[(val)])) #define UPP(val) (toupper(ctxt->input->cur[(val)]))
#define CUR_PTR ctxt->input->cur #define CUR_PTR ctxt->input->cur
#define SHRINK xmlParserInputShrink(ctxt->input) #define SHRINK xmlParserInputShrink(ctxt->input)
#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK) #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
#define SKIP_BLANKS \ #define CURRENT ((int) (*ctxt->input->cur))
while (IS_BLANK(*(ctxt->input->cur))) NEXT
#ifndef USE_UTF_8 #define NEXT htmlNextChar(ctxt);
#define CURRENT (*ctxt->input->cur)
#define NEXT { \
if ((*ctxt->input->cur == 0) && \
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
xmlPopInput(ctxt); \
} else { \
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
ctxt->input->cur++; \
ctxt->nbChars++; \
if (*ctxt->input->cur == 0) \
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
}}
#else #define SKIP_BLANKS htmlSkipBlankChars(ctxt);
#endif
/**
* htmlNextChar:
* @ctxt: the HTML parser context
*
* Skip to the next char input char.
*/
void
htmlNextChar(htmlParserCtxtPtr ctxt) {
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
xmlPopInput(ctxt);
} else {
if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else ctxt->input->col++;
ctxt->input->cur++;
ctxt->nbChars++;
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
}
}
/**
* htmlSkipBlankChars:
* @ctxt: the HTML parser context
*
* skip all blanks character found at that point in the input streams.
*
* Returns the number of space chars skipped
*/
int
htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int res = 0;
while (IS_BLANK(*(ctxt->input->cur))) {
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
xmlPopInput(ctxt);
} else {
if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else ctxt->input->col++;
ctxt->input->cur++;
ctxt->nbChars++;
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
}
res++;
}
return(res);
}
@ -475,7 +520,7 @@ htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
if (elem == NULL) return(1); if (elem == NULL) return(1);
if (!xmlStrcmp(name, elem->name)) return(0); if (!xmlStrcmp(name, elem->name)) return(0);
if (htmlCheckAutoClose(elem->name, name)) return(1); if (htmlCheckAutoClose(elem->name, name)) return(1);
child = elem->childs; child = elem->children;
while (child != NULL) { while (child != NULL) {
if (htmlAutoCloseTag(doc, name, child)) return(1); if (htmlAutoCloseTag(doc, name, child)) return(1);
child = child->next; child = child->next;
@ -499,7 +544,7 @@ htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
htmlNodePtr child; htmlNodePtr child;
if (elem == NULL) return(1); if (elem == NULL) return(1);
child = elem->childs; child = elem->children;
while (child != NULL) { while (child != NULL) {
if (htmlAutoCloseTag(doc, elem->name, child)) return(1); if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
child = child->next; child = child->next;
@ -1275,7 +1320,7 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
else else
xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI); xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI);
cur->name = NULL; cur->name = NULL;
cur->root = NULL; cur->children = NULL;
cur->extSubset = NULL; cur->extSubset = NULL;
cur->oldNs = NULL; cur->oldNs = NULL;
cur->encoding = NULL; cur->encoding = NULL;
@ -1285,7 +1330,6 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
cur->refs = NULL; cur->refs = NULL;
#ifndef XML_WITHOUT_CORBA #ifndef XML_WITHOUT_CORBA
cur->_private = NULL; cur->_private = NULL;
cur->vepv = NULL;
#endif #endif
return(cur); return(cur);
} }
@ -1667,7 +1711,8 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
} }
} else { } else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); ctxt->sax->error(ctxt->userData,
"SystemLiteral \" or ' expected\n");
ctxt->wellFormed = 0; ctxt->wellFormed = 0;
} }

View File

@ -80,7 +80,7 @@ htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
} }
xmlBufferWriteChar(buf, " "); xmlBufferWriteChar(buf, " ");
xmlBufferWriteCHAR(buf, cur->name); xmlBufferWriteCHAR(buf, cur->name);
value = xmlNodeListGetString(doc, cur->val, 0); value = xmlNodeListGetString(doc, cur->children, 0);
if (value) { if (value) {
xmlBufferWriteChar(buf, "="); xmlBufferWriteChar(buf, "=");
xmlBufferWriteQuotedString(buf, value); xmlBufferWriteQuotedString(buf, value);
@ -212,7 +212,7 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
} }
return; return;
} }
if ((cur->content == NULL) && (cur->childs == NULL)) { if ((cur->content == NULL) && (cur->children == NULL)) {
if ((info != NULL) && (info->endTag != 0)) if ((info != NULL) && (info->endTag != 0))
xmlBufferWriteChar(buf, ">"); xmlBufferWriteChar(buf, ">");
else { else {
@ -242,15 +242,15 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
xmlFree(buffer); xmlFree(buffer);
} }
} }
if (cur->childs != NULL) { if (cur->children != NULL) {
if ((cur->childs->type != HTML_TEXT_NODE) && if ((cur->children->type != HTML_TEXT_NODE) &&
(cur->childs->type != HTML_ENTITY_REF_NODE) && (cur->children->type != HTML_ENTITY_REF_NODE) &&
(cur->childs != cur->last)) (cur->children != cur->last))
xmlBufferWriteChar(buf, "\n"); xmlBufferWriteChar(buf, "\n");
htmlNodeListDump(buf, doc, cur->childs); htmlNodeListDump(buf, doc, cur->children);
if ((cur->last->type != HTML_TEXT_NODE) && if ((cur->last->type != HTML_TEXT_NODE) &&
(cur->last->type != HTML_ENTITY_REF_NODE) && (cur->last->type != HTML_ENTITY_REF_NODE) &&
(cur->childs != cur->last)) (cur->children != cur->last))
xmlBufferWriteChar(buf, "\n"); xmlBufferWriteChar(buf, "\n");
} }
if (!htmlIsAutoClosed(doc, cur)) { if (!htmlIsAutoClosed(doc, cur)) {
@ -307,8 +307,8 @@ htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">"); xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
} }
if (cur->root != NULL) { if (cur->children != NULL) {
htmlNodeListDump(buf, cur, cur->root); htmlNodeListDump(buf, cur, cur->children);
} }
xmlBufferWriteChar(buf, "\n"); xmlBufferWriteChar(buf, "\n");
cur->type = type; cur->type = type;

312
SAX.c
View File

@ -158,66 +158,112 @@ internalSubset(void *ctx, const xmlChar *name,
name, ExternalID, SystemID); name, ExternalID, SystemID);
#endif #endif
xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID); xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
}
/**
* externalSubset:
* @ctx: the user data (XML parser context)
*
* Callback on external subset declaration.
*/
void
externalSubset(void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.externalSubset(%s, %s, %s)\n",
name, ExternalID, SystemID);
#endif
if (((ExternalID != NULL) || (SystemID != NULL)) && if (((ExternalID != NULL) || (SystemID != NULL)) &&
(ctxt->validate && ctxt->wellFormed && ctxt->myDoc)) { (ctxt->validate && ctxt->wellFormed && ctxt->myDoc)) {
/* /*
* Try to fetch and parse the external subset. * Try to fetch and parse the external subset.
*/ */
xmlDtdPtr ret = NULL; xmlParserInputPtr oldinput;
xmlParserCtxtPtr dtdCtxt; int oldinputNr;
int oldinputMax;
xmlParserInputPtr *oldinputTab;
int oldwellFormed;
xmlParserInputPtr input = NULL; xmlParserInputPtr input = NULL;
xmlCharEncoding enc; xmlCharEncoding enc;
dtdCtxt = xmlNewParserCtxt();
if (dtdCtxt == NULL) return;
/* /*
* Ask the Entity resolver to load the damn thing * Ask the Entity resolver to load the damn thing
*/ */
if ((ctxt->directory != NULL) && (dtdCtxt->directory == NULL)) if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
dtdCtxt->directory = (char *) xmlStrdup(BAD_CAST ctxt->directory); input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
if ((dtdCtxt->sax != NULL) && (dtdCtxt->sax->resolveEntity != NULL))
input = dtdCtxt->sax->resolveEntity(dtdCtxt->userData, ExternalID,
SystemID); SystemID);
if (input == NULL) { if (input == NULL) {
xmlFreeParserCtxt(dtdCtxt);
return; return;
} }
xmlNewDtd(ctxt->myDoc, name, ExternalID, SystemID);
/* /*
* plug some encoding conversion routines here. !!! * make sure we won't destroy the main document context
*/ */
xmlPushInput(dtdCtxt, input); oldinput = ctxt->input;
enc = xmlDetectCharEncoding(dtdCtxt->input->cur); oldinputNr = ctxt->inputNr;
xmlSwitchEncoding(dtdCtxt, enc); oldinputMax = ctxt->inputMax;
oldinputTab = ctxt->inputTab;
oldwellFormed = ctxt->wellFormed;
ctxt->inputTab = (xmlParserInputPtr *)
xmlMalloc(5 * sizeof(xmlParserInputPtr));
if (ctxt->inputTab == NULL) {
ctxt->errNo = XML_ERR_NO_MEMORY;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"externalSubset: out of memory\n");
ctxt->errNo = XML_ERR_NO_MEMORY;
ctxt->input = oldinput;
ctxt->inputNr = oldinputNr;
ctxt->inputMax = oldinputMax;
ctxt->inputTab = oldinputTab;
return;
}
ctxt->inputNr = 0;
ctxt->inputMax = 5;
ctxt->input = NULL;
xmlPushInput(ctxt, input);
/*
* On the fly encoding conversion if needed
*/
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
xmlSwitchEncoding(ctxt, enc);
if (input->filename == NULL) if (input->filename == NULL)
input->filename = (char *) xmlStrdup(SystemID); input->filename = (char *) xmlStrdup(SystemID);
input->line = 1; input->line = 1;
input->col = 1; input->col = 1;
input->base = dtdCtxt->input->cur; input->base = ctxt->input->cur;
input->cur = dtdCtxt->input->cur; input->cur = ctxt->input->cur;
input->free = NULL; input->free = NULL;
/* /*
* let's parse that entity knowing it's an external subset. * let's parse that entity knowing it's an external subset.
*/ */
xmlParseExternalSubset(dtdCtxt, ExternalID, SystemID); xmlParseExternalSubset(ctxt, ExternalID, SystemID);
if (dtdCtxt->myDoc != NULL) { /*
if (dtdCtxt->wellFormed) { * Free up the external entities
ret = dtdCtxt->myDoc->intSubset; */
dtdCtxt->myDoc->intSubset = NULL;
} else {
ret = NULL;
}
xmlFreeDoc(dtdCtxt->myDoc);
dtdCtxt->myDoc = NULL;
}
xmlFreeParserCtxt(dtdCtxt);
ctxt->myDoc->extSubset = ret; while (ctxt->inputNr > 1)
xmlPopInput(ctxt);
xmlFreeInputStream(ctxt->input);
xmlFree(ctxt->inputTab);
/*
* Restore the parsing context of the main entity
*/
ctxt->input = oldinput;
ctxt->inputNr = oldinputNr;
ctxt->inputMax = oldinputMax;
ctxt->inputTab = oldinputTab;
/* ctxt->wellFormed = oldwellFormed; */
} }
} }
@ -316,13 +362,23 @@ entityDecl(void *ctx, const xmlChar *name, int type,
fprintf(stderr, "SAX.entityDecl(%s, %d, %s, %s, %s)\n", fprintf(stderr, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
name, type, publicId, systemId, content); name, type, publicId, systemId, content);
#endif #endif
xmlAddDocEntity(ctxt->myDoc, name, type, publicId, systemId, content); if (ctxt->inSubset == 1)
xmlAddDocEntity(ctxt->myDoc, name, type, publicId,
systemId, content);
else if (ctxt->inSubset == 2)
xmlAddDtdEntity(ctxt->myDoc, name, type, publicId,
systemId, content);
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.entityDecl(%s) called while not in subset\n", name);
}
} }
/** /**
* attributeDecl: * attributeDecl:
* @ctx: the user data (XML parser context) * @ctx: the user data (XML parser context)
* @name: the attribute name * @fullname: the attribute name
* @type: the attribute type * @type: the attribute type
* @publicId: The public ID of the attribute * @publicId: The public ID of the attribute
* @systemId: The system ID of the attribute * @systemId: The system ID of the attribute
@ -331,24 +387,40 @@ entityDecl(void *ctx, const xmlChar *name, int type,
* An attribute definition has been parsed * An attribute definition has been parsed
*/ */
void void
attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *name, attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *fullname,
int type, int def, const xmlChar *defaultValue, int type, int def, const xmlChar *defaultValue,
xmlEnumerationPtr tree) xmlEnumerationPtr tree)
{ {
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlAttributePtr attr; xmlAttributePtr attr;
xmlChar *name = NULL, *prefix = NULL;
#ifdef DEBUG_SAX #ifdef DEBUG_SAX
fprintf(stderr, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n", fprintf(stderr, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
elem, name, type, def, defaultValue); elem, fullname, type, def, defaultValue);
#endif #endif
name = xmlSplitQName(ctxt, fullname, &prefix);
if (ctxt->inSubset == 1)
attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem, attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem,
name, type, def, defaultValue, tree); name, prefix, type, def, defaultValue, tree);
else if (ctxt->inSubset == 2)
attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, elem,
name, prefix, type, def, defaultValue, tree);
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.attributeDecl(%s) called while not in subset\n", name);
return;
}
if (attr == 0) ctxt->valid = 0; if (attr == 0) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed && if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset) ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateAttributeDecl(&ctxt->vctxt, ctxt->myDoc, ctxt->valid &= xmlValidateAttributeDecl(&ctxt->vctxt, ctxt->myDoc,
attr); attr);
if (prefix != NULL)
xmlFree(prefix);
if (name != NULL)
xmlFree(name);
} }
/** /**
@ -367,16 +439,26 @@ elementDecl(void *ctx, const xmlChar *name, int type,
xmlElementContentPtr content) xmlElementContentPtr content)
{ {
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlElementPtr elem; xmlElementPtr elem = NULL;
#ifdef DEBUG_SAX #ifdef DEBUG_SAX
fprintf(stderr, "SAX.elementDecl(%s, %d, ...)\n", fprintf(stderr, "SAX.elementDecl(%s, %d, ...)\n",
name, type); fullname, type);
#endif #endif
if (ctxt->inSubset == 1)
elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset,
name, type, content); name, type, content);
if (elem == 0) ctxt->valid = 0; else if (ctxt->inSubset == 2)
elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->extSubset,
name, type, content);
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.elementDecl(%s) called while not in subset\n", name);
return;
}
if (elem == NULL) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed && if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset) ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateElementDecl(&ctxt->vctxt, ctxt->myDoc, elem); ctxt->valid &= xmlValidateElementDecl(&ctxt->vctxt, ctxt->myDoc, elem);
@ -396,15 +478,25 @@ notationDecl(void *ctx, const xmlChar *name,
const xmlChar *publicId, const xmlChar *systemId) const xmlChar *publicId, const xmlChar *systemId)
{ {
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlNotationPtr nota; xmlNotationPtr nota = NULL;
#ifdef DEBUG_SAX #ifdef DEBUG_SAX
fprintf(stderr, "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId); fprintf(stderr, "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId);
#endif #endif
if (ctxt->inSubset == 1)
nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name, nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
publicId, systemId); publicId, systemId);
if (nota == 0) ctxt->valid = 0; else if (ctxt->inSubset == 2)
nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
publicId, systemId);
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.notationDecl(%s) called while not in subset\n", name);
return;
}
if (nota == NULL) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed && if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset) ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateNotationDecl(&ctxt->vctxt, ctxt->myDoc, ctxt->valid &= xmlValidateNotationDecl(&ctxt->vctxt, ctxt->myDoc,
@ -518,6 +610,7 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlAttrPtr ret; xmlAttrPtr ret;
xmlChar *name; xmlChar *name;
xmlChar *ns; xmlChar *ns;
xmlChar *nval;
xmlNsPtr namespace; xmlNsPtr namespace;
/**************** /****************
@ -528,7 +621,15 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
/* /*
* Split the full name into a namespace prefix and the tag name * Split the full name into a namespace prefix and the tag name
*/ */
name = xmlSplitQName(fullname, &ns); name = xmlSplitQName(ctxt, fullname, &ns);
/*
* Do the last stave of the attribute normalization
*/
nval = xmlValidNormalizeAttributeValue(ctxt->myDoc,
ctxt->node, fullname, value);
if (nval != NULL)
value = nval;
/* /*
* Check whether it's a namespace definition * Check whether it's a namespace definition
@ -540,15 +641,28 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlNewNs(ctxt->node, value, NULL); xmlNewNs(ctxt->node, value, NULL);
if (name != NULL) if (name != NULL)
xmlFree(name); xmlFree(name);
if (nval != NULL)
xmlFree(nval);
return; return;
} }
if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') && if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
(ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) { (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
/*
* Validate also for namespace decls, they are attributes from
* an XML-1.0 perspective
TODO ... doesn't map well with current API
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
ctxt->node, ret, value);
*/
/* a standard namespace definition */ /* a standard namespace definition */
xmlNewNs(ctxt->node, value, name); xmlNewNs(ctxt->node, value, name);
xmlFree(ns); xmlFree(ns);
if (name != NULL) if (name != NULL)
xmlFree(name); xmlFree(name);
if (nval != NULL)
xmlFree(nval);
return; return;
} }
@ -562,17 +676,52 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
ret = xmlNewNsProp(ctxt->node, namespace, name, NULL); ret = xmlNewNsProp(ctxt->node, namespace, name, NULL);
if (ret != NULL) { if (ret != NULL) {
if ((ctxt->replaceEntities == 0) && (!ctxt->html)) if ((ctxt->replaceEntities == 0) && (!ctxt->html)) {
ret->val = xmlStringGetNodeList(ctxt->myDoc, value); xmlNodePtr tmp;
else
ret->val = xmlNewDocText(ctxt->myDoc, value); ret->children = xmlStringGetNodeList(ctxt->myDoc, value);
tmp = ret->children;
while (tmp != NULL) {
tmp->parent = (xmlNodePtr) ret;
if (tmp->next == NULL)
ret->last = tmp;
tmp = tmp->next;
}
} else {
ret->children = xmlNewDocText(ctxt->myDoc, value);
ret->last = ret->children;
if (ret->children != NULL)
ret->children->parent = (xmlNodePtr) ret;
}
} }
if (ctxt->validate && ctxt->wellFormed && if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset) ctxt->myDoc && ctxt->myDoc->intSubset) {
/*
* If we don't substitute entities, the validation should be
* done on a value with replaced entities anyway.
*/
if (!ctxt->replaceEntities) {
xmlChar *val;
ctxt->depth++;
val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
0,0,0);
ctxt->depth--;
if (val == NULL)
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
ctxt->myDoc, ctxt->node, ret, value);
else {
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
ctxt->myDoc, ctxt->node, ret, val);
xmlFree(val);
}
} else {
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc, ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
ctxt->node, ret, value); ctxt->node, ret, value);
else { }
} else {
/* /*
* when validating, the ID registration is done at the attribute * when validating, the ID registration is done at the attribute
* validation level. Otherwise we have to do specific handling here. * validation level. Otherwise we have to do specific handling here.
@ -583,6 +732,8 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlAddRef(&ctxt->vctxt, ctxt->myDoc, value, ret); xmlAddRef(&ctxt->vctxt, ctxt->myDoc, value, ret);
} }
if (nval != NULL)
xmlFree(nval);
if (name != NULL) if (name != NULL)
xmlFree(name); xmlFree(name);
if (ns != NULL) if (ns != NULL)
@ -634,7 +785,7 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
/* /*
* Split the full name into a namespace prefix and the tag name * Split the full name into a namespace prefix and the tag name
*/ */
name = xmlSplitQName(fullname, &prefix); name = xmlSplitQName(ctxt, fullname, &prefix);
/* /*
@ -644,13 +795,13 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
*/ */
ret = xmlNewDocNode(ctxt->myDoc, NULL, name, NULL); ret = xmlNewDocNode(ctxt->myDoc, NULL, name, NULL);
if (ret == NULL) return; if (ret == NULL) return;
if (ctxt->myDoc->root == NULL) { if (ctxt->myDoc->children == NULL) {
#ifdef DEBUG_SAX_TREE #ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting %s as root\n", name); fprintf(stderr, "Setting %s as root\n", name);
#endif #endif
ctxt->myDoc->root = ret; xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
} else if (parent == NULL) { } else if (parent == NULL) {
parent = ctxt->myDoc->root; parent = ctxt->myDoc->children;
} }
/* /*
@ -679,6 +830,15 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
} }
} }
/*
* If it's the Document root, finish the Dtd validation and
* check the document root element for validity
*/
if ((ctxt->validate) && (ctxt->vctxt.finishDtd == 0)) {
ctxt->valid &= xmlValidateDtdFinal(&ctxt->vctxt, ctxt->myDoc);
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
ctxt->vctxt.finishDtd = 1;
}
/* /*
* process all the attributes whose name start with "xml" * process all the attributes whose name start with "xml"
*/ */
@ -790,6 +950,9 @@ reference(void *ctx, const xmlChar *name)
#ifdef DEBUG_SAX #ifdef DEBUG_SAX
fprintf(stderr, "SAX.reference(%s)\n", name); fprintf(stderr, "SAX.reference(%s)\n", name);
#endif #endif
if (name[0] == '#')
ret = xmlNewCharRef(ctxt->myDoc, name);
else
ret = xmlNewReference(ctxt->myDoc, name); ret = xmlNewReference(ctxt->myDoc, name);
#ifdef DEBUG_SAX_TREE #ifdef DEBUG_SAX_TREE
fprintf(stderr, "add reference %s to %s \n", name, ctxt->node->name); fprintf(stderr, "add reference %s to %s \n", name, ctxt->node->name);
@ -884,32 +1047,36 @@ processingInstruction(void *ctx, const xmlChar *target,
ret = xmlNewPI(target, data); ret = xmlNewPI(target, data);
if (ret == NULL) return; if (ret == NULL) return;
ret->doc = ctxt->myDoc; parent = ctxt->node;
if (ctxt->myDoc->root == NULL) {
if (ctxt->inSubset == 1) {
xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
return;
} else if (ctxt->inSubset == 2) {
xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret);
return;
}
if ((ctxt->myDoc->children == NULL) || (parent == NULL)) {
#ifdef DEBUG_SAX_TREE #ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting PI %s as root\n", target); fprintf(stderr, "Setting PI %s as root\n", target);
#endif #endif
ctxt->myDoc->root = ret; xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
} else if (parent == NULL) { return;
parent = ctxt->myDoc->root;
} }
if (parent != NULL) {
if (parent->type == XML_ELEMENT_NODE) { if (parent->type == XML_ELEMENT_NODE) {
#ifdef DEBUG_SAX_TREE #ifdef DEBUG_SAX_TREE
fprintf(stderr, "adding PI child %s to %s\n", target, parent->name); fprintf(stderr, "adding PI %s child to %s\n", target, parent->name);
#endif #endif
xmlAddChild(parent, ret); xmlAddChild(parent, ret);
} else { } else {
#ifdef DEBUG_SAX_TREE #ifdef DEBUG_SAX_TREE
fprintf(stderr, "adding PI sibling %s to ", target); fprintf(stderr, "adding PI %s sibling to ", target);
xmlDebugDumpOneNode(stderr, parent, 0); xmlDebugDumpOneNode(stderr, parent, 0);
#endif #endif
xmlAddSibling(parent, ret); xmlAddSibling(parent, ret);
} }
} }
}
/** /**
* globalNamespace: * globalNamespace:
* @ctx: the user data (XML parser context) * @ctx: the user data (XML parser context)
@ -1064,15 +1231,20 @@ comment(void *ctx, const xmlChar *value)
ret = xmlNewDocComment(ctxt->myDoc, value); ret = xmlNewDocComment(ctxt->myDoc, value);
if (ret == NULL) return; if (ret == NULL) return;
if (ctxt->myDoc->root == NULL) { if (ctxt->inSubset == 1) {
xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
return;
} else if (ctxt->inSubset == 2) {
xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret);
return;
}
if ((ctxt->myDoc->children == NULL) || (parent == NULL)) {
#ifdef DEBUG_SAX_TREE #ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting comment as root\n"); fprintf(stderr, "Setting comment as root\n");
#endif #endif
ctxt->myDoc->root = ret; xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
} else if (parent == NULL) { return;
parent = ctxt->myDoc->root;
} }
if (parent != NULL) {
if (parent->type == XML_ELEMENT_NODE) { if (parent->type == XML_ELEMENT_NODE) {
#ifdef DEBUG_SAX_TREE #ifdef DEBUG_SAX_TREE
fprintf(stderr, "adding comment child to %s\n", parent->name); fprintf(stderr, "adding comment child to %s\n", parent->name);
@ -1086,7 +1258,6 @@ comment(void *ctx, const xmlChar *value)
xmlAddSibling(parent, ret); xmlAddSibling(parent, ret);
} }
} }
}
/** /**
* cdataBlock: * cdataBlock:
@ -1148,6 +1319,7 @@ xmlSAXHandler xmlDefaultSAXHandler = {
xmlParserError, xmlParserError,
getParameterEntity, getParameterEntity,
cdataBlock, cdataBlock,
externalSubset,
}; };
/** /**
@ -1159,6 +1331,7 @@ void
xmlDefaultSAXHandlerInit(void) xmlDefaultSAXHandlerInit(void)
{ {
xmlDefaultSAXHandler.internalSubset = internalSubset; xmlDefaultSAXHandler.internalSubset = internalSubset;
xmlDefaultSAXHandler.externalSubset = externalSubset;
xmlDefaultSAXHandler.isStandalone = isStandalone; xmlDefaultSAXHandler.isStandalone = isStandalone;
xmlDefaultSAXHandler.hasInternalSubset = hasInternalSubset; xmlDefaultSAXHandler.hasInternalSubset = hasInternalSubset;
xmlDefaultSAXHandler.hasExternalSubset = hasExternalSubset; xmlDefaultSAXHandler.hasExternalSubset = hasExternalSubset;
@ -1181,6 +1354,9 @@ xmlDefaultSAXHandlerInit(void)
xmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace; xmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace;
xmlDefaultSAXHandler.processingInstruction = processingInstruction; xmlDefaultSAXHandler.processingInstruction = processingInstruction;
xmlDefaultSAXHandler.comment = comment; xmlDefaultSAXHandler.comment = comment;
if (xmlGetWarningsDefaultValue == 0)
xmlDefaultSAXHandler.warning = NULL;
else
xmlDefaultSAXHandler.warning = xmlParserWarning; xmlDefaultSAXHandler.warning = xmlParserWarning;
xmlDefaultSAXHandler.error = xmlParserError; xmlDefaultSAXHandler.error = xmlParserError;
xmlDefaultSAXHandler.fatalError = xmlParserError; xmlDefaultSAXHandler.fatalError = xmlParserError;
@ -1216,6 +1392,7 @@ xmlSAXHandler htmlDefaultSAXHandler = {
xmlParserError, xmlParserError,
getParameterEntity, getParameterEntity,
NULL, NULL,
NULL,
}; };
/** /**
@ -1227,6 +1404,7 @@ void
htmlDefaultSAXHandlerInit(void) htmlDefaultSAXHandlerInit(void)
{ {
htmlDefaultSAXHandler.internalSubset = NULL; htmlDefaultSAXHandler.internalSubset = NULL;
htmlDefaultSAXHandler.externalSubset = NULL;
htmlDefaultSAXHandler.isStandalone = NULL; htmlDefaultSAXHandler.isStandalone = NULL;
htmlDefaultSAXHandler.hasInternalSubset = NULL; htmlDefaultSAXHandler.hasInternalSubset = NULL;
htmlDefaultSAXHandler.hasExternalSubset = NULL; htmlDefaultSAXHandler.hasExternalSubset = NULL;

View File

@ -3,9 +3,9 @@ AC_PREREQ(2.2)
AC_INIT(entities.h) AC_INIT(entities.h)
AM_CONFIG_HEADER(config.h) AM_CONFIG_HEADER(config.h)
LIBXML_MAJOR_VERSION=1 LIBXML_MAJOR_VERSION=2
LIBXML_MINOR_VERSION=8 LIBXML_MINOR_VERSION=0
LIBXML_MICRO_VERSION=7 LIBXML_MICRO_VERSION=0
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
@ -15,7 +15,7 @@ AC_SUBST(LIBXML_MICRO_VERSION)
AC_SUBST(LIBXML_VERSION) AC_SUBST(LIBXML_VERSION)
AC_SUBST(LIBXML_VERSION_INFO) AC_SUBST(LIBXML_VERSION_INFO)
VERSION=$LIBXML_VERSION VERSION=$LIBXML_VERSION-beta
AM_INIT_AUTOMAKE(libxml, $VERSION) AM_INIT_AUTOMAKE(libxml, $VERSION)

View File

@ -22,6 +22,7 @@
#include "xmlmemory.h" #include "xmlmemory.h"
#include "tree.h" #include "tree.h"
#include "parser.h" #include "parser.h"
#include "valid.h"
#include "debugXML.h" #include "debugXML.h"
#include "HTMLtree.h" #include "HTMLtree.h"
#include "HTMLparser.h" #include "HTMLparser.h"
@ -38,6 +39,315 @@ void xmlDebugDumpString(FILE *output, const xmlChar *str) {
fprintf(output, "..."); fprintf(output, "...");
} }
void xmlDebugDumpDtd(FILE *output, xmlDtdPtr dtd, int depth) {
int i;
char shift[100];
for (i = 0;((i < depth) && (i < 25));i++)
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
if (dtd->type != XML_DTD_NODE) {
fprintf(output, "PBM: not a DTD\n");
return;
}
if (dtd->name != NULL)
fprintf(output, "DTD(%s)", dtd->name);
else
fprintf(output, "DTD");
if (dtd->ExternalID != NULL)
fprintf(output, ", PUBLIC %s", dtd->ExternalID);
if (dtd->SystemID != NULL)
fprintf(output, ", SYSTEM %s", dtd->SystemID);
fprintf(output, "\n");
/*
* Do a bit of checking
*/
if (dtd->parent == NULL)
fprintf(output, "PBM: Dtd has no parent\n");
if (dtd->doc == NULL)
fprintf(output, "PBM: Dtd has no doc\n");
if ((dtd->parent != NULL) && (dtd->doc != dtd->parent->doc))
fprintf(output, "PBM: Dtd doc differs from parent's one\n");
if (dtd->prev == NULL) {
if ((dtd->parent != NULL) && (dtd->parent->children != (xmlNodePtr)dtd))
fprintf(output, "PBM: Dtd has no prev and not first of list\n");
} else {
if (dtd->prev->next != (xmlNodePtr) dtd)
fprintf(output, "PBM: Dtd prev->next : back link wrong\n");
}
if (dtd->next == NULL) {
if ((dtd->parent != NULL) && (dtd->parent->last != (xmlNodePtr) dtd))
fprintf(output, "PBM: Dtd has no next and not last of list\n");
} else {
if (dtd->next->prev != (xmlNodePtr) dtd)
fprintf(output, "PBM: Dtd next->prev : forward link wrong\n");
}
}
void xmlDebugDumpAttrDecl(FILE *output, xmlAttributePtr attr, int depth) {
int i;
char shift[100];
for (i = 0;((i < depth) && (i < 25));i++)
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
if (attr->type != XML_ATTRIBUTE_DECL) {
fprintf(output, "PBM: not a Attr\n");
return;
}
if (attr->name != NULL)
fprintf(output, "ATTRDECL(%s)", attr->name);
else
fprintf(output, "PBM ATTRDECL noname!!!");
if (attr->elem != NULL)
fprintf(output, " for %s", attr->elem);
else
fprintf(output, " PBM noelem!!!");
switch (attr->atype) {
case XML_ATTRIBUTE_CDATA:
fprintf(output, " CDATA");
break;
case XML_ATTRIBUTE_ID:
fprintf(output, " ID");
break;
case XML_ATTRIBUTE_IDREF:
fprintf(output, " IDREF");
break;
case XML_ATTRIBUTE_IDREFS:
fprintf(output, " IDREFS");
break;
case XML_ATTRIBUTE_ENTITY:
fprintf(output, " ENTITY");
break;
case XML_ATTRIBUTE_ENTITIES:
fprintf(output, " ENTITIES");
break;
case XML_ATTRIBUTE_NMTOKEN:
fprintf(output, " NMTOKEN");
break;
case XML_ATTRIBUTE_NMTOKENS:
fprintf(output, " NMTOKENS");
break;
case XML_ATTRIBUTE_ENUMERATION:
fprintf(output, " ENUMERATION");
break;
case XML_ATTRIBUTE_NOTATION:
fprintf(output, " NOTATION ");
break;
}
if (attr->tree != NULL) {
int i;
xmlEnumerationPtr cur = attr->tree;
for (i = 0;i < 5; i++) {
if (i != 0)
fprintf(output, "|%s", cur->name);
else
fprintf(output, " (%s", cur->name);
cur = cur->next;
if (cur == NULL) break;
}
if (cur == NULL)
fprintf(output, ")");
else
fprintf(output, "...)");
}
switch (attr->def) {
case XML_ATTRIBUTE_NONE:
break;
case XML_ATTRIBUTE_REQUIRED:
fprintf(output, " REQUIRED");
break;
case XML_ATTRIBUTE_IMPLIED:
fprintf(output, " IMPLIED");
break;
case XML_ATTRIBUTE_FIXED:
fprintf(output, " FIXED");
break;
}
if (attr->defaultValue != NULL) {
fprintf(output, "\"");
xmlDebugDumpString(output, attr->defaultValue);
fprintf(output, "\"");
}
printf("\n");
/*
* Do a bit of checking
*/
if (attr->parent == NULL)
fprintf(output, "PBM: Attr has no parent\n");
if (attr->doc == NULL)
fprintf(output, "PBM: Attr has no doc\n");
if ((attr->parent != NULL) && (attr->doc != attr->parent->doc))
fprintf(output, "PBM: Attr doc differs from parent's one\n");
if (attr->prev == NULL) {
if ((attr->parent != NULL) && (attr->parent->children != (xmlNodePtr)attr))
fprintf(output, "PBM: Attr has no prev and not first of list\n");
} else {
if (attr->prev->next != (xmlNodePtr) attr)
fprintf(output, "PBM: Attr prev->next : back link wrong\n");
}
if (attr->next == NULL) {
if ((attr->parent != NULL) && (attr->parent->last != (xmlNodePtr) attr))
fprintf(output, "PBM: Attr has no next and not last of list\n");
} else {
if (attr->next->prev != (xmlNodePtr) attr)
fprintf(output, "PBM: Attr next->prev : forward link wrong\n");
}
}
void xmlDebugDumpElemDecl(FILE *output, xmlElementPtr elem, int depth) {
int i;
char shift[100];
for (i = 0;((i < depth) && (i < 25));i++)
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
if (elem->type != XML_ELEMENT_DECL) {
fprintf(output, "PBM: not a Elem\n");
return;
}
if (elem->name != NULL)
fprintf(output, "ELEMDECL(%s)", elem->name);
else
fprintf(output, "PBM ELEMDECL noname!!!");
switch (elem->etype) {
case XML_ELEMENT_TYPE_EMPTY:
fprintf(output, ", EMPTY");
break;
case XML_ELEMENT_TYPE_ANY:
fprintf(output, ", ANY");
break;
case XML_ELEMENT_TYPE_MIXED:
fprintf(output, ", MIXED ");
break;
case XML_ELEMENT_TYPE_ELEMENT:
fprintf(output, ", MIXED ");
break;
}
if (elem->content != NULL) {
char buf[1001];
buf[0] = 0;
xmlSprintfElementContent(buf, elem->content, 1);
buf[1000] = 0;
fprintf(output, "%s", buf);
}
printf("\n");
/*
* Do a bit of checking
*/
if (elem->parent == NULL)
fprintf(output, "PBM: Elem has no parent\n");
if (elem->doc == NULL)
fprintf(output, "PBM: Elem has no doc\n");
if ((elem->parent != NULL) && (elem->doc != elem->parent->doc))
fprintf(output, "PBM: Elem doc differs from parent's one\n");
if (elem->prev == NULL) {
if ((elem->parent != NULL) && (elem->parent->children != (xmlNodePtr)elem))
fprintf(output, "PBM: Elem has no prev and not first of list\n");
} else {
if (elem->prev->next != (xmlNodePtr) elem)
fprintf(output, "PBM: Elem prev->next : back link wrong\n");
}
if (elem->next == NULL) {
if ((elem->parent != NULL) && (elem->parent->last != (xmlNodePtr) elem))
fprintf(output, "PBM: Elem has no next and not last of list\n");
} else {
if (elem->next->prev != (xmlNodePtr) elem)
fprintf(output, "PBM: Elem next->prev : forward link wrong\n");
}
}
void xmlDebugDumpEntityDecl(FILE *output, xmlEntityPtr ent, int depth) {
int i;
char shift[100];
for (i = 0;((i < depth) && (i < 25));i++)
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
if (ent->type != XML_ENTITY_DECL) {
fprintf(output, "PBM: not a Entity decl\n");
return;
}
if (ent->name != NULL)
fprintf(output, "ENTITYDECL(%s)", ent->name);
else
fprintf(output, "PBM ENTITYDECL noname!!!");
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, ", internal\n");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
fprintf(output, ", external parsed\n");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
fprintf(output, ", unparsed\n");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
fprintf(output, ", parameter\n");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
fprintf(output, ", external parameter\n");
break;
case XML_INTERNAL_PREDEFINED_ENTITY:
fprintf(output, ", predefined\n");
break;
}
if (ent->ExternalID) {
fprintf(output, shift);
fprintf(output, "ExternalID=%s\n", ent->ExternalID);
}
if (ent->SystemID) {
fprintf(output, shift);
fprintf(output, "SystemID=%s\n", ent->SystemID);
}
if (ent->content) {
fprintf(output, shift);
fprintf(output, "content=");
xmlDebugDumpString(output, ent->content);
fprintf(output, "\n");
}
/*
* Do a bit of checking
*/
if (ent->parent == NULL)
fprintf(output, "PBM: Ent has no parent\n");
if (ent->doc == NULL)
fprintf(output, "PBM: Ent has no doc\n");
if ((ent->parent != NULL) && (ent->doc != ent->parent->doc))
fprintf(output, "PBM: Ent doc differs from parent's one\n");
if (ent->prev == NULL) {
if ((ent->parent != NULL) && (ent->parent->children != (xmlNodePtr)ent))
fprintf(output, "PBM: Ent has no prev and not first of list\n");
} else {
if (ent->prev->next != (xmlNodePtr) ent)
fprintf(output, "PBM: Ent prev->next : back link wrong\n");
}
if (ent->next == NULL) {
if ((ent->parent != NULL) && (ent->parent->last != (xmlNodePtr) ent))
fprintf(output, "PBM: Ent has no next and not last of list\n");
} else {
if (ent->next->prev != (xmlNodePtr) ent)
fprintf(output, "PBM: Ent next->prev : forward link wrong\n");
}
}
void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) { void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) {
int i; int i;
char shift[100]; char shift[100];
@ -74,7 +384,7 @@ void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) {
shift[2 * i] = shift[2 * i + 1] = 0; shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift); fprintf(output, shift);
switch (ent->type) { switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY: case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, "INTERNAL_GENERAL_ENTITY "); fprintf(output, "INTERNAL_GENERAL_ENTITY ");
break; break;
@ -91,7 +401,7 @@ void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) {
fprintf(output, "EXTERNAL_PARAMETER_ENTITY "); fprintf(output, "EXTERNAL_PARAMETER_ENTITY ");
break; break;
default: default:
fprintf(output, "ENTITY_%d ! ", ent->type); fprintf(output, "ENTITY_%d ! ", ent->etype);
} }
fprintf(output, "%s\n", ent->name); fprintf(output, "%s\n", ent->name);
if (ent->ExternalID) { if (ent->ExternalID) {
@ -119,9 +429,31 @@ void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth) {
shift[2 * i] = shift[2 * i + 1] = 0; shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift); fprintf(output, shift);
fprintf(output, "ATTRIBUTE %s\n", attr->name); fprintf(output, "ATTRIBUTE %s\n", attr->name);
if (attr->val != NULL) if (attr->children != NULL)
xmlDebugDumpNodeList(output, attr->val, depth + 1); xmlDebugDumpNodeList(output, attr->children, depth + 1);
/*
* Do a bit of checking
*/
if (attr->parent == NULL)
fprintf(output, "PBM: Attr has no parent\n");
if (attr->doc == NULL)
fprintf(output, "PBM: Attr has no doc\n");
if ((attr->parent != NULL) && (attr->doc != attr->parent->doc))
fprintf(output, "PBM: Attr doc differs from parent's one\n");
if (attr->prev == NULL) {
if ((attr->parent != NULL) && (attr->parent->properties != attr))
fprintf(output, "PBM: Attr has no prev and not first of list\n");
} else {
if (attr->prev->next != attr)
fprintf(output, "PBM: Attr prev->next : back link wrong\n");
}
if (attr->next != NULL) {
if (attr->next->prev != attr)
fprintf(output, "PBM: Attr next->prev : forward link wrong\n");
}
} }
void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth) { void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth) {
@ -139,9 +471,9 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
shift[2 * i] = shift[2 * i + 1] = ' '; shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0; shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
switch (node->type) { switch (node->type) {
case XML_ELEMENT_NODE: case XML_ELEMENT_NODE:
fprintf(output, shift);
fprintf(output, "ELEMENT "); fprintf(output, "ELEMENT ");
if (node->ns != NULL) if (node->ns != NULL)
fprintf(output, "%s:%s\n", node->ns->prefix, node->name); fprintf(output, "%s:%s\n", node->ns->prefix, node->name);
@ -149,40 +481,63 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
fprintf(output, "%s\n", node->name); fprintf(output, "%s\n", node->name);
break; break;
case XML_ATTRIBUTE_NODE: case XML_ATTRIBUTE_NODE:
fprintf(output, shift);
fprintf(output, "Error, ATTRIBUTE found here\n"); fprintf(output, "Error, ATTRIBUTE found here\n");
break; break;
case XML_TEXT_NODE: case XML_TEXT_NODE:
fprintf(output, shift);
fprintf(output, "TEXT\n"); fprintf(output, "TEXT\n");
break; break;
case XML_CDATA_SECTION_NODE: case XML_CDATA_SECTION_NODE:
fprintf(output, shift);
fprintf(output, "CDATA_SECTION\n"); fprintf(output, "CDATA_SECTION\n");
break; break;
case XML_ENTITY_REF_NODE: case XML_ENTITY_REF_NODE:
fprintf(output, "ENTITY_REF\n"); fprintf(output, shift);
fprintf(output, "ENTITY_REF(%s)\n", node->name);
break; break;
case XML_ENTITY_NODE: case XML_ENTITY_NODE:
fprintf(output, shift);
fprintf(output, "ENTITY\n"); fprintf(output, "ENTITY\n");
break; break;
case XML_PI_NODE: case XML_PI_NODE:
fprintf(output, shift);
fprintf(output, "PI %s\n", node->name); fprintf(output, "PI %s\n", node->name);
break; break;
case XML_COMMENT_NODE: case XML_COMMENT_NODE:
fprintf(output, shift);
fprintf(output, "COMMENT\n"); fprintf(output, "COMMENT\n");
break; break;
case XML_DOCUMENT_NODE: case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE:
fprintf(output, shift);
fprintf(output, "Error, DOCUMENT found here\n"); fprintf(output, "Error, DOCUMENT found here\n");
break; break;
case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_TYPE_NODE:
fprintf(output, shift);
fprintf(output, "DOCUMENT_TYPE\n"); fprintf(output, "DOCUMENT_TYPE\n");
break; break;
case XML_DOCUMENT_FRAG_NODE: case XML_DOCUMENT_FRAG_NODE:
fprintf(output, shift);
fprintf(output, "DOCUMENT_FRAG\n"); fprintf(output, "DOCUMENT_FRAG\n");
break; break;
case XML_NOTATION_NODE: case XML_NOTATION_NODE:
fprintf(output, "NOTATION\n"); fprintf(output, "NOTATION\n");
break; break;
case XML_DTD_NODE:
xmlDebugDumpDtd(output, (xmlDtdPtr) node, depth);
return;
case XML_ELEMENT_DECL:
xmlDebugDumpElemDecl(output, (xmlElementPtr) node, depth);
return;
case XML_ATTRIBUTE_DECL:
xmlDebugDumpAttrDecl(output, (xmlAttributePtr) node, depth);
return;
case XML_ENTITY_DECL:
xmlDebugDumpEntityDecl(output, (xmlEntityPtr) node, depth);
return;
default: default:
fprintf(output, shift);
fprintf(output, "NODE_%d\n", node->type); fprintf(output, "NODE_%d\n", node->type);
} }
if (node->doc == NULL) { if (node->doc == NULL) {
@ -210,12 +565,35 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
if (ent != NULL) if (ent != NULL)
xmlDebugDumpEntity(output, ent, depth + 1); xmlDebugDumpEntity(output, ent, depth + 1);
} }
/*
* Do a bit of checking
*/
if (node->parent == NULL)
fprintf(output, "PBM: Node has no parent\n");
if (node->doc == NULL)
fprintf(output, "PBM: Node has no doc\n");
if ((node->parent != NULL) && (node->doc != node->parent->doc))
fprintf(output, "PBM: Node doc differs from parent's one\n");
if (node->prev == NULL) {
if ((node->parent != NULL) && (node->parent->children != node))
fprintf(output, "PBM: Node has no prev and not first of list\n");
} else {
if (node->prev->next != node)
fprintf(output, "PBM: Node prev->next : back link wrong\n");
}
if (node->next == NULL) {
if ((node->parent != NULL) && (node->parent->last != node))
fprintf(output, "PBM: Node has no next and not last of list\n");
} else {
if (node->next->prev != node)
fprintf(output, "PBM: Node next->prev : forward link wrong\n");
}
} }
void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth) { void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth) {
xmlDebugDumpOneNode(output, node, depth); xmlDebugDumpOneNode(output, node, depth);
if (node->childs != NULL) if (node->children != NULL)
xmlDebugDumpNodeList(output, node->childs, depth + 1); xmlDebugDumpNodeList(output, node->children, depth + 1);
} }
void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth) { void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth) {
@ -306,8 +684,8 @@ void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) {
xmlDebugDumpDocumentHead(output, doc); xmlDebugDumpDocumentHead(output, doc);
if (((doc->type == XML_DOCUMENT_NODE) || if (((doc->type == XML_DOCUMENT_NODE) ||
(doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->type == XML_HTML_DOCUMENT_NODE)) &&
(doc->root != NULL)) (doc->children != NULL))
xmlDebugDumpNodeList(output, doc->root, 1); xmlDebugDumpNodeList(output, doc->children, 1);
} }
void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) { void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
@ -368,27 +746,27 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
doc->intSubset->entities; doc->intSubset->entities;
fprintf(output, "Entities in internal subset\n"); fprintf(output, "Entities in internal subset\n");
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
fprintf(output, "%d : %s : ", i, cur->name); fprintf(output, "%d : %s : ", i, cur->name);
switch (cur->type) { switch (cur->etype) {
case XML_INTERNAL_GENERAL_ENTITY: case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, "INTERNAL GENERAL"); fprintf(output, "INTERNAL GENERAL, ");
break; break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
fprintf(output, "EXTERNAL PARSED"); fprintf(output, "EXTERNAL PARSED, ");
break; break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
fprintf(output, "EXTERNAL UNPARSED"); fprintf(output, "EXTERNAL UNPARSED, ");
break; break;
case XML_INTERNAL_PARAMETER_ENTITY: case XML_INTERNAL_PARAMETER_ENTITY:
fprintf(output, "INTERNAL PARAMETER"); fprintf(output, "INTERNAL PARAMETER, ");
break; break;
case XML_EXTERNAL_PARAMETER_ENTITY: case XML_EXTERNAL_PARAMETER_ENTITY:
fprintf(output, "EXTERNAL PARAMETER"); fprintf(output, "EXTERNAL PARAMETER, ");
break; break;
default: default:
fprintf(output, "UNKNOWN TYPE %d", fprintf(output, "UNKNOWN TYPE %d",
cur->type); cur->etype);
} }
if (cur->ExternalID != NULL) if (cur->ExternalID != NULL)
fprintf(output, "ID \"%s\"", cur->ExternalID); fprintf(output, "ID \"%s\"", cur->ExternalID);
@ -407,27 +785,27 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
doc->extSubset->entities; doc->extSubset->entities;
fprintf(output, "Entities in external subset\n"); fprintf(output, "Entities in external subset\n");
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
fprintf(output, "%d : %s : ", i, cur->name); fprintf(output, "%d : %s : ", i, cur->name);
switch (cur->type) { switch (cur->etype) {
case XML_INTERNAL_GENERAL_ENTITY: case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, "INTERNAL GENERAL"); fprintf(output, "INTERNAL GENERAL, ");
break; break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
fprintf(output, "EXTERNAL PARSED"); fprintf(output, "EXTERNAL PARSED, ");
break; break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
fprintf(output, "EXTERNAL UNPARSED"); fprintf(output, "EXTERNAL UNPARSED, ");
break; break;
case XML_INTERNAL_PARAMETER_ENTITY: case XML_INTERNAL_PARAMETER_ENTITY:
fprintf(output, "INTERNAL PARAMETER"); fprintf(output, "INTERNAL PARAMETER, ");
break; break;
case XML_EXTERNAL_PARAMETER_ENTITY: case XML_EXTERNAL_PARAMETER_ENTITY:
fprintf(output, "EXTERNAL PARAMETER"); fprintf(output, "EXTERNAL PARAMETER, ");
break; break;
default: default:
fprintf(output, "UNKNOWN TYPE %d", fprintf(output, "UNKNOWN TYPE %d",
cur->type); cur->etype);
} }
if (cur->ExternalID != NULL) if (cur->ExternalID != NULL)
fprintf(output, "ID \"%s\"", cur->ExternalID); fprintf(output, "ID \"%s\"", cur->ExternalID);
@ -449,14 +827,14 @@ static int xmlLsCountNode(xmlNodePtr node) {
switch (node->type) { switch (node->type) {
case XML_ELEMENT_NODE: case XML_ELEMENT_NODE:
list = node->childs; list = node->children;
break; break;
case XML_DOCUMENT_NODE: case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE:
list = ((xmlDocPtr) node)->root; list = ((xmlDocPtr) node)->children;
break; break;
case XML_ATTRIBUTE_NODE: case XML_ATTRIBUTE_NODE:
list = ((xmlAttrPtr) node)->val; list = ((xmlAttrPtr) node)->children;
break; break;
case XML_TEXT_NODE: case XML_TEXT_NODE:
case XML_CDATA_SECTION_NODE: case XML_CDATA_SECTION_NODE:
@ -475,6 +853,10 @@ static int xmlLsCountNode(xmlNodePtr node) {
case XML_ENTITY_NODE: case XML_ENTITY_NODE:
case XML_DOCUMENT_FRAG_NODE: case XML_DOCUMENT_FRAG_NODE:
case XML_NOTATION_NODE: case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
ret = 1; ret = 1;
break; break;
} }
@ -621,9 +1003,9 @@ xmlShellList(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node,
if ((node->type == XML_DOCUMENT_NODE) || if ((node->type == XML_DOCUMENT_NODE) ||
(node->type == XML_HTML_DOCUMENT_NODE)) { (node->type == XML_HTML_DOCUMENT_NODE)) {
cur = ((xmlDocPtr) node)->root; cur = ((xmlDocPtr) node)->children;
} else if (node->childs != NULL) { } else if (node->children != NULL) {
cur = node->childs; cur = node->children;
} else { } else {
xmlLsOneNode(stdout, node); xmlLsOneNode(stdout, node);
return(0); return(0);
@ -910,10 +1292,10 @@ xmlShellDu(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr tree,
if ((node->type == XML_DOCUMENT_NODE) || if ((node->type == XML_DOCUMENT_NODE) ||
(node->type == XML_HTML_DOCUMENT_NODE)) { (node->type == XML_HTML_DOCUMENT_NODE)) {
node = ((xmlDocPtr) node)->root; node = ((xmlDocPtr) node)->children;
} else if (node->childs != NULL) { } else if (node->children != NULL) {
/* deep first */ /* deep first */
node = node->childs; node = node->children;
indent++; indent++;
} else if ((node != tree) && (node->next != NULL)) { } else if ((node != tree) && (node->next != NULL)) {
/* then siblings */ /* then siblings */
@ -1008,7 +1390,7 @@ xmlShellPwd(xmlShellCtxtPtr ctxt, char *buffer, xmlNodePtr node,
} else if (cur->type == XML_ATTRIBUTE_NODE) { } else if (cur->type == XML_ATTRIBUTE_NODE) {
sep = '@'; sep = '@';
name = (const char *) (((xmlAttrPtr) cur)->name); name = (const char *) (((xmlAttrPtr) cur)->name);
next = ((xmlAttrPtr) cur)->node; next = ((xmlAttrPtr) cur)->parent;
} else { } else {
next = cur->parent; next = cur->parent;
} }

View File

@ -35,14 +35,11 @@
#include <stdlib.h> #include <stdlib.h>
#endif #endif
#include "encoding.h" #include "encoding.h"
#ifdef HAVE_UNICODE_H
#include <unicode.h>
#endif
#include "xmlmemory.h" #include "xmlmemory.h"
#ifdef HAVE_UNICODE_H xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
#else /* ! HAVE_UNICODE_H */
/* /*
* From rfc2044: encoding of the Unicode values on UTF-8: * From rfc2044: encoding of the Unicode values on UTF-8:
* *
@ -54,6 +51,50 @@
* I hope we won't use values > 0xFFFF anytime soon ! * I hope we won't use values > 0xFFFF anytime soon !
*/ */
/**
* xmlCheckUTF8: Check utf-8 string for legality.
* @utf: Pointer to putative utf-8 encoded string.
*
* Checks @utf for being valid utf-8. @utf is assumed to be
* null-terminated. This function is not super-strict, as it will
* allow longer utf-8 sequences than necessary. Note that Java is
* capable of producing these sequences if provoked. Also note, this
* routine checks for the 4-byte maxiumum size, but does not check for
* 0x10ffff maximum value.
*
* Return value: true if @utf is valid.
**/
int
xmlCheckUTF8(const unsigned char *utf)
{
int ix;
unsigned char c;
for (ix = 0; (c = utf[ix]);) {
if (c & 0x80) {
if ((utf[ix + 1] & 0xc0) != 0x80)
return(0);
if ((c & 0xe0) == 0xe0) {
if ((utf[ix + 2] & 0xc0) != 0x80)
return(0);
if ((c & 0xf0) == 0xf0) {
if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
return(0);
ix += 4;
/* 4-byte code */
} else
/* 3-byte code */
ix += 3;
} else
/* 2-byte code */
ix += 2;
} else
/* 1-byte code */
ix++;
}
return(1);
}
/** /**
* isolat1ToUTF8: * isolat1ToUTF8:
* @out: a pointer to an array of bytes to store the result * @out: a pointer to an array of bytes to store the result
@ -66,27 +107,27 @@
* Returns the number of byte written, or -1 by lack of space. * Returns the number of byte written, or -1 by lack of space.
*/ */
int int
isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen) isolat1ToUTF8(unsigned char* out, int outlen,
{ const unsigned char* in, int *inlen) {
unsigned char* outstart= out; unsigned char* outstart= out;
unsigned char* outend= out+outlen; unsigned char* outend= out+outlen;
unsigned char* inend= in+inlen; const unsigned char* inend= in+*inlen;
unsigned char c; unsigned char c;
while (in < inend) { while (in < inend) {
c= *in++; c= *in++;
if (c < 0x80) { if (c < 0x80) {
if (out >= outend) return -1; if (out >= outend) return(-1);
*out++ = c; *out++ = c;
} }
else { else {
if (out >= outend) return -1; if (out >= outend) return(-1);
*out++ = 0xC0 | (c >> 6); *out++ = 0xC0 | (c >> 6);
if (out >= outend) return -1; if (out >= outend) return(-1);
*out++ = 0x80 | (0x3F & c); *out++ = 0x80 | (0x3F & c);
} }
} }
return out-outstart; return(out-outstart);
} }
/** /**
@ -101,138 +142,398 @@ isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
* TODO: UTF8Toisolat1 need a fallback mechanism ... * TODO: UTF8Toisolat1 need a fallback mechanism ...
* *
* Returns the number of byte written, or -1 by lack of space, or -2 * Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding failed. * if the transcoding fails (for *in is not valid utf8 string or
* the result of transformation can't fit into the encoding we want)
* The value of @inlen after return is the number of octets consumed
* as the return value is positive, else unpredictiable.
*/ */
int int
UTF8Toisolat1(unsigned char* out, int outlen, unsigned char* in, int inlen) UTF8Toisolat1(unsigned char* out, int outlen,
{ const unsigned char* in, int *inlen) {
unsigned char* outstart= out; unsigned char* outstart= out;
unsigned char* outend= out+outlen; unsigned char* outend= out+outlen;
unsigned char* inend= in+inlen; const unsigned char* inend= in+*inlen;
unsigned char c; unsigned char c;
while (in < inend) { while (in < inend) {
c= *in++; c= *in++;
if (c < 0x80) { if (c < 0x80) {
if (out >= outend) return -1; if (out >= outend) return(-1);
*out++= c; *out++= c;
} }
else if (((c & 0xFE) == 0xC2) && in<inend) { else if (in == inend) {
if (out >= outend) return -1; *inlen -= 1;
break;
}
else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
/* a two byte utf-8 and can be encoding as isolate1 */
*out++= ((c & 0x03) << 6) | (*in++ & 0x3F); *out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
} }
else return -2; else
return(-2);
/* TODO : some should be represent as "&#x____;" */
} }
return out-outstart; return(out-outstart);
} }
/** /**
* UTF16ToUTF8: * UTF16LEToUTF8:
* @out: a pointer to an array of bytes to store the result * @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out * @outlen: the length of @out
* @in: a pointer to an array of UTF-16 chars (array of unsigned shorts) * @inb: a pointer to an array of UTF-16LE passwd as a byte array
* @inlen: the length of @in * @inlenb: the length of @in in UTF-16LE chars
* *
* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
* block of chars out. * block of chars out. This function assume the endian properity
* Returns the number of byte written, or -1 by lack of space. * is the same between the native type of this machine and the
* inputed one.
*
* Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding fails (for *in is not valid utf16 string)
* The value of *inlen after return is the number of octets consumed
* as the return value is positive, else unpredictiable.
*/ */
int int
UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int inlen) UTF16LEToUTF8(unsigned char* out, int outlen,
const unsigned char* inb, int *inlenb)
{ {
unsigned char* outstart= out; unsigned char* outstart= out;
unsigned char* outend= out+outlen; unsigned char* outend= out+outlen;
unsigned short* inend= in+inlen; unsigned short* in = (unsigned short*) inb;
unsigned int c, d; unsigned short* inend;
unsigned int c, d, inlen;
unsigned char *tmp;
int bits; int bits;
if ((*inlenb % 2) == 1)
(*inlenb)--;
inlen = *inlenb / 2;
inend= in + inlen;
while (in < inend) { while (in < inend) {
#ifdef BIG_ENDIAN
tmp = (unsigned char *) in;
c = *tmp++;
c = c | (((unsigned int)*tmp) << 8);
in++;
#else /* BIG_ENDIAN */
c= *in++; c= *in++;
#endif /* BIG_ENDIAN */
if ((c & 0xFC00) == 0xD800) { /* surrogates */ if ((c & 0xFC00) == 0xD800) { /* surrogates */
if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) { if (in >= inend) { /* (in > inend) shouldn't happens */
(*inlenb) -= 2;
break;
}
#ifdef BIG_ENDIAN
tmp = (unsigned char *) in;
d = *tmp++;
d = d | (((unsigned int)*tmp) << 8);
in++;
#else /* BIG_ENDIAN */
d = *in++;
#endif /* BIG_ENDIAN */
if ((d & 0xFC00) == 0xDC00) {
c &= 0x03FF; c &= 0x03FF;
c <<= 10; c <<= 10;
c |= d & 0x03FF; c |= d & 0x03FF;
c += 0x10000; c += 0x10000;
} }
else return -1; else
return(-2);
} }
/* assertion: c is a single UTF-4 value */ /* assertion: c is a single UTF-4 value */
if (out >= outend)
if (out >= outend) return -1; return(-1);
if (c < 0x80) { *out++= c; bits= -6; } if (c < 0x80) { *out++= c; bits= -6; }
else if (c < 0x800) { *out++= (c >> 6) | 0xC0; bits= 0; } else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
else if (c < 0x10000) { *out++= (c >> 12) | 0xE0; bits= 6; } else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
else { *out++= (c >> 18) | 0xF0; bits= 12; } else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
for ( ; bits > 0; bits-= 6) { for ( ; bits >= 0; bits-= 6) {
if (out >= outend) return -1; if (out >= outend)
*out++= (c >> bits) & 0x3F; return(-1);
*out++= ((c >> bits) & 0x3F) | 0x80;
} }
} }
return out-outstart; return(out-outstart);
} }
/** /**
* UTF8ToUTF16: * UTF8ToUTF16LE:
* @out: a pointer to an array of shorts to store the result * @outb: a pointer to an array of bytes to store the result
* @outlen: the length of @out (number of shorts) * @outlen: the length of @outb
* @in: a pointer to an array of UTF-8 chars * @in: a pointer to an array of UTF-8 chars
* @inlen: the length of @in * @inlen: the length of @in
* *
* Take a block of UTF-8 chars in and try to convert it to an UTF-16 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
* block of chars out. * block of chars out.
* TODO: UTF8ToUTF16 need a fallback mechanism ... * TODO: UTF8ToUTF16LE need a fallback mechanism ...
* *
* Returns the number of byte written, or -1 by lack of space, or -2 * Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding failed. * if the transcoding failed.
*/ */
int int
UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen) UTF8ToUTF16LE(unsigned char* outb, int outlen,
const unsigned char* in, int *inlen)
{ {
unsigned short* out = (unsigned short*) outb;
unsigned short* outstart= out; unsigned short* outstart= out;
unsigned short* outend= out+outlen; unsigned short* outend;
unsigned char* inend= in+inlen; const unsigned char* inend= in+*inlen;
unsigned int c, d, trailing; unsigned int c, d, trailing;
#ifdef BIG_ENDIAN
unsigned char *tmp;
unsigned short tmp1, tmp2;
#endif /* BIG_ENDIAN */
outlen /= 2; /* convert in short length */
outend = out + outlen;
while (in < inend) { while (in < inend) {
d= *in++; d= *in++;
if (d < 0x80) { c= d; trailing= 0; } if (d < 0x80) { c= d; trailing= 0; }
else if (d < 0xC0) return -2; /* trailing byte in leading position */ else if (d < 0xC0)
return(-2); /* trailing byte in leading position */
else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; } else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
else return -2; /* no chance for this in UTF-16 */ else
return(-2); /* no chance for this in UTF-16 */
if (inend - in < trailing) {
*inlen -= (inend - in);
break;
}
for ( ; trailing; trailing--) { for ( ; trailing; trailing--) {
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) return -1; if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
return(-1);
c <<= 6; c <<= 6;
c |= d & 0x3F; c |= d & 0x3F;
} }
/* assertion: c is a single UTF-4 value */ /* assertion: c is a single UTF-4 value */
if (c < 0x10000) { if (c < 0x10000) {
if (out >= outend) return -1; if (out >= outend)
return(-1);
#ifdef BIG_ENDIAN
tmp = (unsigned char *) out;
*tmp = c ;
*(tmp + 1) = c >> 8 ;
out++;
#else /* BIG_ENDIAN */
*out++ = c; *out++ = c;
#endif /* BIG_ENDIAN */
} }
else if (c < 0x110000) { else if (c < 0x110000) {
if (out+1 >= outend) return -1; if (out+1 >= outend)
return(-1);
c -= 0x10000; c -= 0x10000;
#ifdef BIG_ENDIAN
tmp1 = 0xD800 | (c >> 10);
tmp = (unsigned char *) out;
*tmp = tmp1;
*(tmp + 1) = tmp1 >> 8;
out++;
tmp2 = 0xDC00 | (c & 0x03FF);
tmp = (unsigned char *) out;
*tmp = tmp2;
*(tmp + 1) = tmp2 >> 8;
out++;
#else /* BIG_ENDIAN */
*out++ = 0xD800 | (c >> 10); *out++ = 0xD800 | (c >> 10);
*out++ = 0xDC00 | (c & 0x03FF); *out++ = 0xDC00 | (c & 0x03FF);
#endif /* BIG_ENDIAN */
} }
else return -1; else
return(-1);
} }
return out-outstart; return(out-outstart);
} }
#endif /* ! HAVE_UNICODE_H */ /**
* UTF16BEToUTF8:
* @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out
* @inb: a pointer to an array of UTF-16 passwd as a byte array
* @inlenb: the length of @in in UTF-16 chars
*
* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
* block of chars out. This function assume the endian properity
* is the same between the native type of this machine and the
* inputed one.
*
* Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding fails (for *in is not valid utf16 string)
* The value of *inlen after return is the number of octets consumed
* as the return value is positive, else unpredictiable.
*/
int
UTF16BEToUTF8(unsigned char* out, int outlen,
const unsigned char* inb, int *inlenb)
{
unsigned char* outstart= out;
unsigned char* outend= out+outlen;
unsigned short* in = (unsigned short*) inb;
unsigned short* inend;
unsigned int c, d, inlen;
#ifdef BIG_ENDIAN
#else /* BIG_ENDIAN */
unsigned char *tmp;
#endif /* BIG_ENDIAN */
int bits;
if ((*inlenb % 2) == 1)
(*inlenb)--;
inlen = *inlenb / 2;
inend= in + inlen;
while (in < inend) {
#ifdef BIG_ENDIAN
c= *in++;
#else
tmp = (unsigned char *) in;
c = *tmp++;
c = c << 8;
c = c | (unsigned int) *tmp;
in++;
#endif
if ((c & 0xFC00) == 0xD800) { /* surrogates */
if (in >= inend) { /* (in > inend) shouldn't happens */
(*inlenb) -= 2;
break;
}
#ifdef BIG_ENDIAN
d= *in++;
#else
tmp = (unsigned char *) in;
d = *tmp++;
d = d << 8;
d = d | (unsigned int) *tmp;
in++;
#endif
if ((d & 0xFC00) == 0xDC00) {
c &= 0x03FF;
c <<= 10;
c |= d & 0x03FF;
c += 0x10000;
}
else
return(-2);
}
/* assertion: c is a single UTF-4 value */
if (out >= outend)
return(-1);
if (c < 0x80) { *out++= c; bits= -6; }
else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
for ( ; bits >= 0; bits-= 6) {
if (out >= outend)
return(-1);
*out++= ((c >> bits) & 0x3F) | 0x80;
}
}
return(out-outstart);
}
/**
* UTF8ToUTF16BE:
* @outb: a pointer to an array of bytes to store the result
* @outlen: the length of @outb
* @in: a pointer to an array of UTF-8 chars
* @inlen: the length of @in
*
* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
* block of chars out.
* TODO: UTF8ToUTF16BE need a fallback mechanism ...
*
* Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding failed.
*/
int
UTF8ToUTF16BE(unsigned char* outb, int outlen,
const unsigned char* in, int *inlen)
{
unsigned short* out = (unsigned short*) outb;
unsigned short* outstart= out;
unsigned short* outend;
const unsigned char* inend= in+*inlen;
unsigned int c, d, trailing;
#ifdef BIG_ENDIAN
#else
unsigned char *tmp;
unsigned short tmp1, tmp2;
#endif /* BIG_ENDIAN */
outlen /= 2; /* convert in short length */
outend = out + outlen;
while (in < inend) {
d= *in++;
if (d < 0x80) { c= d; trailing= 0; }
else if (d < 0xC0)
return(-2); /* trailing byte in leading position */
else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
else
return(-2); /* no chance for this in UTF-16 */
if (inend - in < trailing) {
*inlen -= (inend - in);
break;
}
for ( ; trailing; trailing--) {
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) return(-1);
c <<= 6;
c |= d & 0x3F;
}
/* assertion: c is a single UTF-4 value */
if (c < 0x10000) {
if (out >= outend) return(-1);
#ifdef BIG_ENDIAN
*out++ = c;
#else
tmp = (unsigned char *) out;
*tmp = c >> 8;
*(tmp + 1) = c;
out++;
#endif /* BIG_ENDIAN */
}
else if (c < 0x110000) {
if (out+1 >= outend) return(-1);
c -= 0x10000;
#ifdef BIG_ENDIAN
*out++ = 0xD800 | (c >> 10);
*out++ = 0xDC00 | (c & 0x03FF);
#else
tmp1 = 0xD800 | (c >> 10);
tmp = (unsigned char *) out;
*tmp = tmp1 >> 8;
*(tmp + 1) = tmp1;
out++;
tmp2 = 0xDC00 | (c & 0x03FF);
tmp = (unsigned char *) out;
*tmp = tmp2 >> 8;
*(tmp + 1) = tmp2;
out++;
#endif
}
else return(-1);
}
return(out-outstart);
}
/** /**
* xmlDetectCharEncoding: * xmlDetectCharEncoding:
* @in: a pointer to the first bytes of the XML entity, must be at least * @in: a pointer to the first bytes of the XML entity, must be at least
* 4 bytes long. * 4 bytes long.
* @len: pointer to the length of the buffer
* *
* Guess the encoding of the entity using the first bytes of the entity content * Guess the encoding of the entity using the first bytes of the entity content
* accordingly of the non-normative appendix F of the XML-1.0 recommendation. * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
@ -240,8 +541,9 @@ UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
* Returns one of the XML_CHAR_ENCODING_... values. * Returns one of the XML_CHAR_ENCODING_... values.
*/ */
xmlCharEncoding xmlCharEncoding
xmlDetectCharEncoding(const unsigned char* in) xmlDetectCharEncoding(const unsigned char* in, int len)
{ {
if (len >= 4) {
if ((in[0] == 0x00) && (in[1] == 0x00) && if ((in[0] == 0x00) && (in[1] == 0x00) &&
(in[2] == 0x00) && (in[3] == 0x3C)) (in[2] == 0x00) && (in[3] == 0x3C))
return(XML_CHAR_ENCODING_UCS4BE); return(XML_CHAR_ENCODING_UCS4BE);
@ -254,16 +556,19 @@ xmlDetectCharEncoding(const unsigned char* in)
if ((in[0] == 0x00) && (in[1] == 0x3C) && if ((in[0] == 0x00) && (in[1] == 0x3C) &&
(in[2] == 0x00) && (in[3] == 0x00)) (in[2] == 0x00) && (in[3] == 0x00))
return(XML_CHAR_ENCODING_UCS4_3412); return(XML_CHAR_ENCODING_UCS4_3412);
if ((in[0] == 0xFE) && (in[1] == 0xFF))
return(XML_CHAR_ENCODING_UTF16BE);
if ((in[0] == 0xFF) && (in[1] == 0xFE))
return(XML_CHAR_ENCODING_UTF16LE);
if ((in[0] == 0x4C) && (in[1] == 0x6F) && if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
(in[2] == 0xA7) && (in[3] == 0x94)) (in[2] == 0xA7) && (in[3] == 0x94))
return(XML_CHAR_ENCODING_EBCDIC); return(XML_CHAR_ENCODING_EBCDIC);
if ((in[0] == 0x3C) && (in[1] == 0x3F) && if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
(in[2] == 0x78) && (in[3] == 0x6D)) (in[2] == 0x78) && (in[3] == 0x6D))
return(XML_CHAR_ENCODING_UTF8); return(XML_CHAR_ENCODING_UTF8);
}
if (len >= 2) {
if ((in[0] == 0xFE) && (in[1] == 0xFF))
return(XML_CHAR_ENCODING_UTF16BE);
if ((in[0] == 0xFF) && (in[1] == 0xFE))
return(XML_CHAR_ENCODING_UTF16LE);
}
return(XML_CHAR_ENCODING_NONE); return(XML_CHAR_ENCODING_NONE);
} }
@ -364,7 +669,8 @@ static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
*/ */
xmlCharEncodingHandlerPtr xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input, xmlNewCharEncodingHandler(const char *name,
xmlCharEncodingInputFunc input,
xmlCharEncodingOutputFunc output) { xmlCharEncodingOutputFunc output) {
xmlCharEncodingHandlerPtr handler; xmlCharEncodingHandlerPtr handler;
char upper[500]; char upper[500];
@ -429,11 +735,11 @@ xmlInitCharEncodingHandlers(void) {
return; return;
} }
xmlNewCharEncodingHandler("UTF-8", NULL, NULL); xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
#ifdef HAVE_UNICODE_H xmlUTF16LEHandler =
#else xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
/* xmlNewCharEncodingHandler("UTF-16", UTF16ToUTF8, UTF8ToUTF16); */ xmlUTF16BEHandler =
xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
#endif
} }
/** /**
@ -493,7 +799,52 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
xmlCharEncodingHandlerPtr xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc) { xmlGetCharEncodingHandler(xmlCharEncoding enc) {
if (handlers == NULL) xmlInitCharEncodingHandlers(); if (handlers == NULL) xmlInitCharEncodingHandlers();
/* TODO xmlGetCharEncodingHandler !!!!!!! */ switch (enc) {
case XML_CHAR_ENCODING_ERROR:
return(NULL);
case XML_CHAR_ENCODING_NONE:
return(NULL);
case XML_CHAR_ENCODING_UTF8:
return(NULL);
case XML_CHAR_ENCODING_UTF16LE:
return(xmlUTF16LEHandler);
case XML_CHAR_ENCODING_UTF16BE:
return(xmlUTF16BEHandler);
case XML_CHAR_ENCODING_EBCDIC:
return(NULL);
case XML_CHAR_ENCODING_UCS4LE:
return(NULL);
case XML_CHAR_ENCODING_UCS4BE:
return(NULL);
case XML_CHAR_ENCODING_UCS4_2143:
return(NULL);
case XML_CHAR_ENCODING_UCS4_3412:
return(NULL);
case XML_CHAR_ENCODING_UCS2:
return(NULL);
case XML_CHAR_ENCODING_8859_1:
return(NULL);
case XML_CHAR_ENCODING_8859_2:
return(NULL);
case XML_CHAR_ENCODING_8859_3:
return(NULL);
case XML_CHAR_ENCODING_8859_4:
return(NULL);
case XML_CHAR_ENCODING_8859_5:
return(NULL);
case XML_CHAR_ENCODING_8859_6:
return(NULL);
case XML_CHAR_ENCODING_8859_7:
return(NULL);
case XML_CHAR_ENCODING_8859_8:
return(NULL);
case XML_CHAR_ENCODING_8859_9:
return(NULL);
case XML_CHAR_ENCODING_2022_JP:
case XML_CHAR_ENCODING_SHIFT_JIS:
case XML_CHAR_ENCODING_EUC_JP:
return(NULL);
}
return(NULL); return(NULL);
} }

View File

@ -67,11 +67,11 @@ typedef enum {
* Returns the number of byte written, or -1 by lack of space. * Returns the number of byte written, or -1 by lack of space.
*/ */
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen, typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
unsigned char* in, int inlen); const unsigned char* in, int *inlen);
/** /**
* xmlCharEncodingInputFunc: * xmlCharEncodingOutputFunc:
* @out: a pointer ot an array of bytes to store the result * @out: a pointer ot an array of bytes to store the result
* @outlen: the lenght of @out * @outlen: the lenght of @out
* @in: a pointer ot an array of UTF-8 chars * @in: a pointer ot an array of UTF-8 chars
@ -84,7 +84,7 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
* if the transcoding failed. * if the transcoding failed.
*/ */
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen, typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
unsigned char* in, int inlen); const unsigned char* in, int *inlen);
/* /*
* Block defining the handlers for non UTF-8 encodings. * Block defining the handlers for non UTF-8 encodings.
@ -101,10 +101,12 @@ struct _xmlCharEncodingHandler {
void xmlInitCharEncodingHandlers (void); void xmlInitCharEncodingHandlers (void);
void xmlCleanupCharEncodingHandlers (void); void xmlCleanupCharEncodingHandlers (void);
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in); xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in,
int len);
xmlCharEncoding xmlParseCharEncoding (const char* name); xmlCharEncoding xmlParseCharEncoding (const char* name);
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc); xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name); xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
int xmlCheckUTF8 (const unsigned char *utf);
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -21,6 +21,8 @@
#include "entities.h" #include "entities.h"
#include "parser.h" #include "parser.h"
#define DEBUG_ENT_REF /* debugging of cross entities dependancies */
/* /*
* The XML predefined entities. * The XML predefined entities.
*/ */
@ -45,6 +47,8 @@ xmlEntitiesTablePtr xmlPredefinedEntities = NULL;
void xmlFreeEntity(xmlEntityPtr entity) { void xmlFreeEntity(xmlEntityPtr entity) {
if (entity == NULL) return; if (entity == NULL) return;
if (entity->children)
xmlFreeNodeList(entity->children);
if (entity->name != NULL) if (entity->name != NULL)
xmlFree((char *) entity->name); xmlFree((char *) entity->name);
if (entity->ExternalID != NULL) if (entity->ExternalID != NULL)
@ -55,22 +59,31 @@ void xmlFreeEntity(xmlEntityPtr entity) {
xmlFree((char *) entity->content); xmlFree((char *) entity->content);
if (entity->orig != NULL) if (entity->orig != NULL)
xmlFree((char *) entity->orig); xmlFree((char *) entity->orig);
#ifdef WITH_EXTRA_ENT_DETECT
if (entity->entTab != NULL) {
int i;
for (i = 0; i < entity->entNr; i++)
xmlFree(entity->entTab[i]);
xmlFree(entity->entTab);
}
#endif
memset(entity, -1, sizeof(xmlEntity)); memset(entity, -1, sizeof(xmlEntity));
xmlFree(entity);
} }
/* /*
* xmlAddEntity : register a new entity for an entities table. * xmlAddEntity : register a new entity for an entities table.
*/ */
static void static xmlEntityPtr
xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type, xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) { const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
int i; int i;
xmlEntityPtr cur; xmlEntityPtr ret;
int len;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; ret = table->table[i];
if (!xmlStrcmp(cur->name, name)) { if (!xmlStrcmp(ret->name, name)) {
/* /*
* The entity is already defined in this Dtd, the spec says to NOT * The entity is already defined in this Dtd, the spec says to NOT
* override it ... Is it worth a Warning ??? !!! * override it ... Is it worth a Warning ??? !!!
@ -78,15 +91,15 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
*/ */
if (((type == XML_INTERNAL_PARAMETER_ENTITY) || if (((type == XML_INTERNAL_PARAMETER_ENTITY) ||
(type == XML_EXTERNAL_PARAMETER_ENTITY)) && (type == XML_EXTERNAL_PARAMETER_ENTITY)) &&
((cur->type == XML_INTERNAL_PARAMETER_ENTITY) || ((ret->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->type == XML_EXTERNAL_PARAMETER_ENTITY))) (ret->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
return; return(NULL);
else else
if (((type != XML_INTERNAL_PARAMETER_ENTITY) && if (((type != XML_INTERNAL_PARAMETER_ENTITY) &&
(type != XML_EXTERNAL_PARAMETER_ENTITY)) && (type != XML_EXTERNAL_PARAMETER_ENTITY)) &&
((cur->type != XML_INTERNAL_PARAMETER_ENTITY) && ((ret->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY))) (ret->etype != XML_EXTERNAL_PARAMETER_ENTITY)))
return; return(NULL);
} }
} }
if (table->nb_entities >= table->max_entities) { if (table->nb_entities >= table->max_entities) {
@ -94,35 +107,43 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
* need more elements. * need more elements.
*/ */
table->max_entities *= 2; table->max_entities *= 2;
table->table = (xmlEntityPtr) table->table = (xmlEntityPtr *)
xmlRealloc(table->table, table->max_entities * sizeof(xmlEntity)); xmlRealloc(table->table,
table->max_entities * sizeof(xmlEntityPtr));
if (table->table == NULL) { if (table->table == NULL) {
perror("realloc failed"); perror("realloc failed");
return; return(NULL);
} }
} }
cur = &table->table[table->nb_entities]; ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
cur->name = xmlStrdup(name); if (ret == NULL) {
for (len = 0;name[0] != 0;name++)len++; fprintf(stderr, "xmlAddEntity: out of memory\n");
cur->len = len; return(NULL);
cur->type = type; }
memset(ret, 0, sizeof(xmlEntity));
ret->type = XML_ENTITY_DECL;
table->table[table->nb_entities] = ret;
/*
* fill the structure.
*/
ret->name = xmlStrdup(name);
ret->etype = type;
if (ExternalID != NULL) if (ExternalID != NULL)
cur->ExternalID = xmlStrdup(ExternalID); ret->ExternalID = xmlStrdup(ExternalID);
else
cur->ExternalID = NULL;
if (SystemID != NULL) if (SystemID != NULL)
cur->SystemID = xmlStrdup(SystemID); ret->SystemID = xmlStrdup(SystemID);
else
cur->SystemID = NULL;
if (content != NULL) { if (content != NULL) {
cur->length = xmlStrlen(content); ret->length = xmlStrlen(content);
cur->content = xmlStrndup(content, cur->length); ret->content = xmlStrndup(content, ret->length);
} else { } else {
cur->length = 0; ret->length = 0;
cur->content = NULL; ret->content = NULL;
} }
cur->orig = NULL; ret->orig = NULL;
table->nb_entities++; table->nb_entities++;
return(ret);
} }
/** /**
@ -182,7 +203,7 @@ xmlGetPredefinedEntity(const xmlChar *name) {
if (xmlPredefinedEntities == NULL) if (xmlPredefinedEntities == NULL)
xmlInitializePredefinedEntities(); xmlInitializePredefinedEntities();
for (i = 0;i < xmlPredefinedEntities->nb_entities;i++) { for (i = 0;i < xmlPredefinedEntities->nb_entities;i++) {
cur = &xmlPredefinedEntities->table[i]; cur = xmlPredefinedEntities->table[i];
if (!xmlStrcmp(cur->name, name)) return(cur); if (!xmlStrcmp(cur->name, name)) return(cur);
} }
return(NULL); return(NULL);
@ -197,24 +218,50 @@ xmlGetPredefinedEntity(const xmlChar *name) {
* @SystemID: the entity system ID if available * @SystemID: the entity system ID if available
* @content: the entity content * @content: the entity content
* *
* Register a new entity for this document DTD. * Register a new entity for this document DTD external subset.
*
* Returns a pointer to the entity or NULL in case of error
*/ */
void xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type, xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) { const xmlChar *ExternalID, const xmlChar *SystemID,
const xmlChar *content) {
xmlEntitiesTablePtr table; xmlEntitiesTablePtr table;
xmlEntityPtr ret;
xmlDtdPtr dtd;
if (doc == NULL) {
fprintf(stderr,
"xmlAddDtdEntity: doc == NULL !\n");
return(NULL);
}
if (doc->extSubset == NULL) { if (doc->extSubset == NULL) {
fprintf(stderr, fprintf(stderr,
"xmlAddDtdEntity: document without external subset !\n"); "xmlAddDtdEntity: document without external subset !\n");
return; return(NULL);
} }
table = (xmlEntitiesTablePtr) doc->extSubset->entities; dtd = doc->extSubset;
table = (xmlEntitiesTablePtr) dtd->entities;
if (table == NULL) { if (table == NULL) {
table = xmlCreateEntitiesTable(); table = xmlCreateEntitiesTable();
doc->extSubset->entities = table; dtd->entities = table;
} }
xmlAddEntity(table, name, type, ExternalID, SystemID, content); ret = xmlAddEntity(table, name, type, ExternalID, SystemID, content);
if (ret == NULL) return(NULL);
/*
* Link it to the Dtd
*/
ret->parent = dtd;
ret->doc = dtd->doc;
if (dtd->last == NULL) {
dtd->children = dtd->last = (xmlNodePtr) ret;
} else {
dtd->last->next = (xmlNodePtr) ret;
ret->prev = dtd->last;
dtd->last = (xmlNodePtr) ret;
}
return(ret);
} }
/** /**
@ -227,29 +274,186 @@ xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
* @content: the entity content * @content: the entity content
* *
* Register a new entity for this document. * Register a new entity for this document.
*
* Returns a pointer to the entity or NULL in case of error
*/ */
void xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type, xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) { const xmlChar *ExternalID, const xmlChar *SystemID,
const xmlChar *content) {
xmlEntitiesTablePtr table; xmlEntitiesTablePtr table;
xmlEntityPtr ret;
xmlDtdPtr dtd;
if (doc == NULL) { if (doc == NULL) {
fprintf(stderr, fprintf(stderr,
"xmlAddDocEntity: document is NULL !\n"); "xmlAddDocEntity: document is NULL !\n");
return; return(NULL);
} }
if (doc->intSubset == NULL) { if (doc->intSubset == NULL) {
fprintf(stderr, fprintf(stderr,
"xmlAddDtdEntity: document without internal subset !\n"); "xmlAddDtdEntity: document without internal subset !\n");
return; return(NULL);
} }
dtd = doc->intSubset;
table = (xmlEntitiesTablePtr) doc->intSubset->entities; table = (xmlEntitiesTablePtr) doc->intSubset->entities;
if (table == NULL) { if (table == NULL) {
table = xmlCreateEntitiesTable(); table = xmlCreateEntitiesTable();
doc->intSubset->entities = table; doc->intSubset->entities = table;
} }
xmlAddEntity(table, name, type, ExternalID, SystemID, content); ret = xmlAddEntity(table, name, type, ExternalID, SystemID, content);
if (ret == NULL) return(NULL);
/*
* Link it to the Dtd
*/
ret->parent = dtd;
ret->doc = dtd->doc;
if (dtd->last == NULL) {
dtd->children = dtd->last = (xmlNodePtr) ret;
} else {
dtd->last->next = (xmlNodePtr) ret;
ret->prev = dtd->last;
dtd->last = (xmlNodePtr) ret;
} }
return(ret);
}
#ifdef WITH_EXTRA_ENT_DETECT
/**
* xmlEntityCheckReference:
* @ent: an existing entity
* @to: the entity name it's referencing
*
* Function to keep track of references and detect cycles (well formedness
* errors !).
*
* Returns: 0 if Okay, -1 in case of general error, 1 in case of loop
* detection.
*/
int
xmlEntityCheckReference(xmlEntityPtr ent, const xmlChar *to) {
int i;
xmlDocPtr doc;
if (ent == NULL) return(-1);
if (to == NULL) return(-1);
doc = ent->doc;
if (doc == NULL) return(-1);
#ifdef DEBUG_ENT_REF
printf("xmlEntityCheckReference(%s to %s)\n", ent->name, to);
#endif
/*
* Do a recursive checking
*/
for (i = 0;i < ent->entNr;i++) {
xmlEntityPtr indir = NULL;
if (!xmlStrcmp(to, ent->entTab[i]))
return(1);
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
indir = xmlGetDocEntity(doc, ent->entTab[i]);
break;
case XML_INTERNAL_PARAMETER_ENTITY:
case XML_EXTERNAL_PARAMETER_ENTITY:
indir = xmlGetDtdEntity(doc, ent->entTab[i]);
break;
case XML_INTERNAL_PREDEFINED_ENTITY:
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
break;
}
if (xmlEntityCheckReference(indir, to) == 1)
return(1);
}
return(0);
}
/**
* xmlEntityAddReference:
* @ent: an existing entity
* @to: the entity name it's referencing
*
* Function to register reuse of an existing entity from a (new) one
* Used to keep track of references and detect cycles (well formedness
* errors !).
*
* Returns: 0 if Okay, -1 in case of general error, 1 in case of loop
* detection.
*/
int
xmlEntityAddReference(xmlEntityPtr ent, const xmlChar *to) {
int i;
xmlDocPtr doc;
xmlEntityPtr indir = NULL;
if (ent == NULL) return(-1);
if (to == NULL) return(-1);
doc = ent->doc;
if (doc == NULL) return(-1);
#ifdef DEBUG_ENT_REF
printf("xmlEntityAddReference(%s to %s)\n", ent->name, to);
#endif
if (ent->entTab == NULL) {
ent->entNr = 0;
ent->entMax = 5;
ent->entTab = (xmlChar **) xmlMalloc(ent->entMax * sizeof(xmlChar *));
if (ent->entTab == NULL) {
fprintf(stderr, "xmlEntityAddReference: out of memory !\n");
return(-1);
}
}
for (i = 0;i < ent->entNr;i++) {
if (!xmlStrcmp(to, ent->entTab[i]))
return(0);
}
/*
* Do a recursive checking
*/
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
indir = xmlGetDocEntity(doc, to);
break;
case XML_INTERNAL_PARAMETER_ENTITY:
case XML_EXTERNAL_PARAMETER_ENTITY:
indir = xmlGetDtdEntity(doc, to);
break;
case XML_INTERNAL_PREDEFINED_ENTITY:
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
break;
}
if ((indir != NULL) &&
(xmlEntityCheckReference(indir, ent->name) == 1))
return(1);
/*
* Add this to the list
*/
if (ent->entMax <= ent->entNr) {
ent->entMax *= 2;
ent->entTab = (xmlChar **) xmlRealloc(ent->entTab,
ent->entMax * sizeof(xmlChar *));
if (ent->entTab == NULL) {
fprintf(stderr, "xmlEntityAddReference: out of memory !\n");
return(-1);
}
}
ent->entTab[ent->entNr++] = xmlStrdup(to);
return(0);
}
#endif
/** /**
* xmlGetParameterEntity: * xmlGetParameterEntity:
@ -270,27 +474,27 @@ xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->intSubset->entities; table = (xmlEntitiesTablePtr) doc->intSubset->entities;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
if (((cur->type == XML_INTERNAL_PARAMETER_ENTITY) || if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->type == XML_EXTERNAL_PARAMETER_ENTITY)) && (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
(!xmlStrcmp(cur->name, name))) return(cur); (!xmlStrcmp(cur->name, name))) return(cur);
} }
} }
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities; table = (xmlEntitiesTablePtr) doc->extSubset->entities;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
if (((cur->type == XML_INTERNAL_PARAMETER_ENTITY) || if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->type == XML_EXTERNAL_PARAMETER_ENTITY)) && (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
(!xmlStrcmp(cur->name, name))) return(cur); (!xmlStrcmp(cur->name, name))) return(cur);
} }
} }
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities; table = (xmlEntitiesTablePtr) doc->extSubset->entities;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
if (((cur->type == XML_INTERNAL_PARAMETER_ENTITY) || if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->type == XML_EXTERNAL_PARAMETER_ENTITY)) && (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
(!xmlStrcmp(cur->name, name))) return(cur); (!xmlStrcmp(cur->name, name))) return(cur);
} }
} }
@ -316,9 +520,9 @@ xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities; table = (xmlEntitiesTablePtr) doc->extSubset->entities;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
if ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) && if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY) && (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
(!xmlStrcmp(cur->name, name))) return(cur); (!xmlStrcmp(cur->name, name))) return(cur);
} }
} }
@ -345,18 +549,18 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->intSubset->entities; table = (xmlEntitiesTablePtr) doc->intSubset->entities;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
if ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) && if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY) && (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
(!xmlStrcmp(cur->name, name))) return(cur); (!xmlStrcmp(cur->name, name))) return(cur);
} }
} }
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities; table = (xmlEntitiesTablePtr) doc->extSubset->entities;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
if ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) && if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY) && (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
(!xmlStrcmp(cur->name, name))) return(cur); (!xmlStrcmp(cur->name, name))) return(cur);
} }
} }
@ -364,9 +568,9 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
xmlInitializePredefinedEntities(); xmlInitializePredefinedEntities();
table = xmlPredefinedEntities; table = xmlPredefinedEntities;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
if ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) && if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY) && (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
(!xmlStrcmp(cur->name, name))) return(cur); (!xmlStrcmp(cur->name, name))) return(cur);
} }
@ -612,6 +816,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
*out++ = 'o'; *out++ = 'o';
*out++ = 't'; *out++ = 't';
*out++ = ';'; *out++ = ';';
#if 0
} else if ((*cur == '\'') && (!html)) { } else if ((*cur == '\'') && (!html)) {
*out++ = '&'; *out++ = '&';
*out++ = 'a'; *out++ = 'a';
@ -619,15 +824,20 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
*out++ = 'o'; *out++ = 'o';
*out++ = 's'; *out++ = 's';
*out++ = ';'; *out++ = ';';
#endif
} else if (((*cur >= 0x20) && (*cur < 0x80)) || } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
(*cur == '\n') || (*cur == '\r') || (*cur == '\t')) { (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
/* /*
* default case, just copy ! * default case, just copy !
*/ */
*out++ = *cur; *out++ = *cur;
#ifndef USE_UTF_8 } else if (*cur >= 0x80) {
} else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) { if (html) {
char buf[10], *ptr; char buf[15], *ptr;
/*
* TODO: improve by searching in html40EntitiesTable
*/
#ifdef HAVE_SNPRINTF #ifdef HAVE_SNPRINTF
snprintf(buf, 9, "&#%d;", *cur); snprintf(buf, 9, "&#%d;", *cur);
#else #else
@ -635,7 +845,80 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
#endif #endif
ptr = buf; ptr = buf;
while (*ptr != 0) *out++ = *ptr++; while (*ptr != 0) *out++ = *ptr++;
} else if (doc->encoding != NULL) {
/*
* TODO !!!
*/
*out++ = *cur;
} else {
/*
* We assume we have UTF-8 input.
*/
char buf[10], *ptr;
int val = 0, l = 1;
if (*cur < 0xC0) {
fprintf(stderr,
"xmlEncodeEntitiesReentrant : input not UTF-8\n");
doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
#ifdef HAVE_SNPRINTF
snprintf(buf, 9, "&#%d;", *cur);
#else
sprintf(buf, "&#%d;", *cur);
#endif #endif
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
continue;
} else if (*cur < 0xE0) {
val = (cur[0]) & 0x1F;
val <<= 6;
val |= (cur[1]) & 0x3F;
l = 2;
} else if (*cur < 0xF0) {
val = (cur[0]) & 0x0F;
val <<= 6;
val |= (cur[1]) & 0x3F;
val <<= 6;
val |= (cur[2]) & 0x3F;
l = 3;
} else if (*cur < 0xF8) {
val = (cur[0]) & 0x07;
val <<= 6;
val |= (cur[1]) & 0x3F;
val <<= 6;
val |= (cur[2]) & 0x3F;
val <<= 6;
val |= (cur[3]) & 0x3F;
l = 4;
}
if ((l == 1) || (!IS_CHAR(val))) {
fprintf(stderr,
"xmlEncodeEntitiesReentrant : char out of range\n");
doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
#ifdef HAVE_SNPRINTF
snprintf(buf, 9, "&#%d;", *cur);
#else
sprintf(buf, "&#%d;", *cur);
#endif
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
cur++;
continue;
}
/*
* We could do multiple things here. Just save as a char ref
*/
#ifdef HAVE_SNPRINTF
snprintf(buf, 14, "&#x%X;", val);
#else
sprintf(buf, "&#x%X;", val);
#endif
buf[14] = 0;
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
cur += l;
continue;
}
} else if (IS_CHAR(*cur)) { } else if (IS_CHAR(*cur)) {
char buf[10], *ptr; char buf[10], *ptr;
@ -682,11 +965,11 @@ xmlCreateEntitiesTable(void) {
} }
ret->max_entities = XML_MIN_ENTITIES_TABLE; ret->max_entities = XML_MIN_ENTITIES_TABLE;
ret->nb_entities = 0; ret->nb_entities = 0;
ret->table = (xmlEntityPtr ) ret->table = (xmlEntityPtr *)
xmlMalloc(ret->max_entities * sizeof(xmlEntity)); xmlMalloc(ret->max_entities * sizeof(xmlEntityPtr));
if (ret == NULL) { if (ret == NULL) {
fprintf(stderr, "xmlCreateEntitiesTable : xmlMalloc(%ld) failed\n", fprintf(stderr, "xmlCreateEntitiesTable : xmlMalloc(%ld) failed\n",
ret->max_entities * (long)sizeof(xmlEntity)); ret->max_entities * (long)sizeof(xmlEntityPtr));
xmlFree(ret); xmlFree(ret);
return(NULL); return(NULL);
} }
@ -706,7 +989,7 @@ xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
if (table == NULL) return; if (table == NULL) return;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
xmlFreeEntity(&table->table[i]); xmlFreeEntity(table->table[i]);
} }
xmlFree(table->table); xmlFree(table->table);
xmlFree(table); xmlFree(table);
@ -731,8 +1014,8 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n"); fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
return(NULL); return(NULL);
} }
ret->table = (xmlEntityPtr) xmlMalloc(table->max_entities * ret->table = (xmlEntityPtr *) xmlMalloc(table->max_entities *
sizeof(xmlEntity)); sizeof(xmlEntityPtr));
if (ret->table == NULL) { if (ret->table == NULL) {
fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n"); fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
xmlFree(ret); xmlFree(ret);
@ -741,34 +1024,119 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
ret->max_entities = table->max_entities; ret->max_entities = table->max_entities;
ret->nb_entities = table->nb_entities; ret->nb_entities = table->nb_entities;
for (i = 0;i < ret->nb_entities;i++) { for (i = 0;i < ret->nb_entities;i++) {
cur = &ret->table[i]; cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
ent = &table->table[i]; if (cur == NULL) {
cur->len = ent->len; fprintf(stderr, "xmlCopyEntityTable: out of memory !\n");
cur->type = ent->type; xmlFree(ret);
xmlFree(ret->table);
return(NULL);
}
memset(cur, 0, sizeof(xmlEntity));
cur->type = XML_ELEMENT_DECL;
ret->table[i] = cur;
ent = table->table[i];
cur->etype = ent->etype;
if (ent->name != NULL) if (ent->name != NULL)
cur->name = xmlStrdup(ent->name); cur->name = xmlStrdup(ent->name);
else
cur->name = NULL;
if (ent->ExternalID != NULL) if (ent->ExternalID != NULL)
cur->ExternalID = xmlStrdup(ent->ExternalID); cur->ExternalID = xmlStrdup(ent->ExternalID);
else
cur->ExternalID = NULL;
if (ent->SystemID != NULL) if (ent->SystemID != NULL)
cur->SystemID = xmlStrdup(ent->SystemID); cur->SystemID = xmlStrdup(ent->SystemID);
else
cur->SystemID = NULL;
if (ent->content != NULL) if (ent->content != NULL)
cur->content = xmlStrdup(ent->content); cur->content = xmlStrdup(ent->content);
else
cur->content = NULL;
if (ent->orig != NULL) if (ent->orig != NULL)
cur->orig = xmlStrdup(ent->orig); cur->orig = xmlStrdup(ent->orig);
else
cur->orig = NULL;
} }
return(ret); return(ret);
} }
/**
* xmlDumpEntityDecl:
* @buf: An XML buffer.
* @ent: An entity table
*
* This will dump the content of the entity table as an XML DTD definition
*/
void
xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, ent->name);
xmlBufferWriteChar(buf, " ");
if (ent->orig != NULL)
xmlBufferWriteQuotedString(buf, ent->orig);
else
xmlBufferWriteQuotedString(buf, ent->content);
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, ent->name);
if (ent->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, ent->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
}
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, ent->name);
if (ent->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, ent->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
}
if (ent->content != NULL) { /* Should be true ! */
xmlBufferWriteChar(buf, " NDATA ");
if (ent->orig != NULL)
xmlBufferWriteCHAR(buf, ent->orig);
else
xmlBufferWriteCHAR(buf, ent->content);
}
xmlBufferWriteChar(buf, ">\n");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY % ");
xmlBufferWriteCHAR(buf, ent->name);
xmlBufferWriteChar(buf, " ");
if (ent->orig == NULL)
xmlBufferWriteQuotedString(buf, ent->content);
else
xmlBufferWriteQuotedString(buf, ent->orig);
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY % ");
xmlBufferWriteCHAR(buf, ent->name);
if (ent->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, ent->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
}
xmlBufferWriteChar(buf, ">\n");
break;
default:
fprintf(stderr,
"xmlDumpEntitiesTable: internal: unknown type %d\n",
ent->etype);
}
}
/** /**
* xmlDumpEntitiesTable: * xmlDumpEntitiesTable:
* @buf: An XML buffer. * @buf: An XML buffer.
@ -784,81 +1152,7 @@ xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
if (table == NULL) return; if (table == NULL) return;
for (i = 0;i < table->nb_entities;i++) { for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i]; cur = table->table[i];
switch (cur->type) { xmlDumpEntityDecl(buf, cur);
case XML_INTERNAL_GENERAL_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, cur->name);
xmlBufferWriteChar(buf, " ");
if (cur->orig != NULL)
xmlBufferWriteQuotedString(buf, cur->orig);
else
xmlBufferWriteQuotedString(buf, cur->content);
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, cur->name);
if (cur->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, cur->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
}
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, cur->name);
if (cur->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, cur->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
}
if (cur->content != NULL) { /* Should be true ! */
xmlBufferWriteChar(buf, " NDATA ");
if (cur->orig != NULL)
xmlBufferWriteCHAR(buf, cur->orig);
else
xmlBufferWriteCHAR(buf, cur->content);
}
xmlBufferWriteChar(buf, ">\n");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY % ");
xmlBufferWriteCHAR(buf, cur->name);
xmlBufferWriteChar(buf, " ");
if (cur->orig == NULL)
xmlBufferWriteQuotedString(buf, cur->content);
else
xmlBufferWriteQuotedString(buf, cur->orig);
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY % ");
xmlBufferWriteCHAR(buf, cur->name);
if (cur->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, cur->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
}
xmlBufferWriteChar(buf, ">\n");
break;
default:
fprintf(stderr,
"xmlDumpEntitiesTable: internal: unknown type %d\n",
cur->type);
}
} }
} }

View File

@ -15,12 +15,17 @@
extern "C" { extern "C" {
#endif #endif
#define XML_INTERNAL_GENERAL_ENTITY 1 /*
#define XML_EXTERNAL_GENERAL_PARSED_ENTITY 2 * The different valid entity types
#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3 */
#define XML_INTERNAL_PARAMETER_ENTITY 4 typedef enum {
#define XML_EXTERNAL_PARAMETER_ENTITY 5 XML_INTERNAL_GENERAL_ENTITY = 1,
#define XML_INTERNAL_PREDEFINED_ENTITY 6 XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2,
XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3,
XML_INTERNAL_PARAMETER_ENTITY = 4,
XML_EXTERNAL_PARAMETER_ENTITY = 5,
XML_INTERNAL_PREDEFINED_ENTITY = 6
} xmlEntityType;
/* /*
* An unit of storage for an entity, contains the string, the value * An unit of storage for an entity, contains the string, the value
@ -30,14 +35,32 @@ extern "C" {
typedef struct _xmlEntity xmlEntity; typedef struct _xmlEntity xmlEntity;
typedef xmlEntity *xmlEntityPtr; typedef xmlEntity *xmlEntityPtr;
struct _xmlEntity { struct _xmlEntity {
int type; /* The entity type */ #ifndef XML_WITHOUT_CORBA
int len; /* The lenght of the name */ void *_private; /* for Corba, must be first ! */
const xmlChar *name; /* Name of the entity */ #endif
const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */ xmlElementType type; /* XML_ENTITY_DECL, must be second ! */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ const xmlChar *name; /* Attribute name */
xmlChar *content; /* The entity content or ndata if unparsed */ struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlChar *orig; /* content without ref substitution */
xmlChar *content; /* content or ndata if unparsed */
int length; /* the content length */ int length; /* the content length */
xmlChar *orig; /* The entity cont without ref substitution */ xmlEntityType etype; /* The entity type */
const xmlChar *ExternalID; /* External identifier for PUBLIC */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
#ifdef WITH_EXTRA_ENT_DETECT
/* Referenced entities name stack */
xmlChar *ent; /* Current parsed Node */
int entNr; /* Depth of the parsing stack */
int entMax; /* Max depth of the parsing stack */
xmlChar * *entTab; /* array of nodes */
#endif
}; };
/* /*
@ -52,7 +75,7 @@ typedef xmlEntitiesTable *xmlEntitiesTablePtr;
struct _xmlEntitiesTable { struct _xmlEntitiesTable {
int nb_entities; /* number of elements stored */ int nb_entities; /* number of elements stored */
int max_entities; /* maximum number of elements */ int max_entities; /* maximum number of elements */
xmlEntityPtr table; /* the table of entities */ xmlEntityPtr *table; /* the table of entities */
}; };
@ -60,13 +83,13 @@ struct _xmlEntitiesTable {
* External functions : * External functions :
*/ */
void xmlAddDocEntity (xmlDocPtr doc, xmlEntityPtr xmlAddDocEntity (xmlDocPtr doc,
const xmlChar *name, const xmlChar *name,
int type, int type,
const xmlChar *ExternalID, const xmlChar *ExternalID,
const xmlChar *SystemID, const xmlChar *SystemID,
const xmlChar *content); const xmlChar *content);
void xmlAddDtdEntity (xmlDocPtr doc, xmlEntityPtr xmlAddDtdEntity (xmlDocPtr doc,
const xmlChar *name, const xmlChar *name,
int type, int type,
const xmlChar *ExternalID, const xmlChar *ExternalID,
@ -88,9 +111,16 @@ xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table);
void xmlFreeEntitiesTable (xmlEntitiesTablePtr table); void xmlFreeEntitiesTable (xmlEntitiesTablePtr table);
void xmlDumpEntitiesTable (xmlBufferPtr buf, void xmlDumpEntitiesTable (xmlBufferPtr buf,
xmlEntitiesTablePtr table); xmlEntitiesTablePtr table);
void xmlDumpEntityDecl (xmlBufferPtr buf,
xmlEntityPtr ent);
xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table); xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table);
void xmlCleanupPredefinedEntities(void); void xmlCleanupPredefinedEntities(void);
#ifdef WITH_EXTRA_ENT_DETECT
int xmlEntityAddReference (xmlEntityPtr ent,
const xmlChar *to);
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -67,11 +67,11 @@ typedef enum {
* Returns the number of byte written, or -1 by lack of space. * Returns the number of byte written, or -1 by lack of space.
*/ */
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen, typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
unsigned char* in, int inlen); const unsigned char* in, int *inlen);
/** /**
* xmlCharEncodingInputFunc: * xmlCharEncodingOutputFunc:
* @out: a pointer ot an array of bytes to store the result * @out: a pointer ot an array of bytes to store the result
* @outlen: the lenght of @out * @outlen: the lenght of @out
* @in: a pointer ot an array of UTF-8 chars * @in: a pointer ot an array of UTF-8 chars
@ -84,7 +84,7 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
* if the transcoding failed. * if the transcoding failed.
*/ */
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen, typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
unsigned char* in, int inlen); const unsigned char* in, int *inlen);
/* /*
* Block defining the handlers for non UTF-8 encodings. * Block defining the handlers for non UTF-8 encodings.
@ -101,10 +101,12 @@ struct _xmlCharEncodingHandler {
void xmlInitCharEncodingHandlers (void); void xmlInitCharEncodingHandlers (void);
void xmlCleanupCharEncodingHandlers (void); void xmlCleanupCharEncodingHandlers (void);
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in); xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in,
int len);
xmlCharEncoding xmlParseCharEncoding (const char* name); xmlCharEncoding xmlParseCharEncoding (const char* name);
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc); xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name); xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
int xmlCheckUTF8 (const unsigned char *utf);
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -15,12 +15,17 @@
extern "C" { extern "C" {
#endif #endif
#define XML_INTERNAL_GENERAL_ENTITY 1 /*
#define XML_EXTERNAL_GENERAL_PARSED_ENTITY 2 * The different valid entity types
#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3 */
#define XML_INTERNAL_PARAMETER_ENTITY 4 typedef enum {
#define XML_EXTERNAL_PARAMETER_ENTITY 5 XML_INTERNAL_GENERAL_ENTITY = 1,
#define XML_INTERNAL_PREDEFINED_ENTITY 6 XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2,
XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3,
XML_INTERNAL_PARAMETER_ENTITY = 4,
XML_EXTERNAL_PARAMETER_ENTITY = 5,
XML_INTERNAL_PREDEFINED_ENTITY = 6
} xmlEntityType;
/* /*
* An unit of storage for an entity, contains the string, the value * An unit of storage for an entity, contains the string, the value
@ -30,14 +35,32 @@ extern "C" {
typedef struct _xmlEntity xmlEntity; typedef struct _xmlEntity xmlEntity;
typedef xmlEntity *xmlEntityPtr; typedef xmlEntity *xmlEntityPtr;
struct _xmlEntity { struct _xmlEntity {
int type; /* The entity type */ #ifndef XML_WITHOUT_CORBA
int len; /* The lenght of the name */ void *_private; /* for Corba, must be first ! */
const xmlChar *name; /* Name of the entity */ #endif
const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */ xmlElementType type; /* XML_ENTITY_DECL, must be second ! */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ const xmlChar *name; /* Attribute name */
xmlChar *content; /* The entity content or ndata if unparsed */ struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlChar *orig; /* content without ref substitution */
xmlChar *content; /* content or ndata if unparsed */
int length; /* the content length */ int length; /* the content length */
xmlChar *orig; /* The entity cont without ref substitution */ xmlEntityType etype; /* The entity type */
const xmlChar *ExternalID; /* External identifier for PUBLIC */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
#ifdef WITH_EXTRA_ENT_DETECT
/* Referenced entities name stack */
xmlChar *ent; /* Current parsed Node */
int entNr; /* Depth of the parsing stack */
int entMax; /* Max depth of the parsing stack */
xmlChar * *entTab; /* array of nodes */
#endif
}; };
/* /*
@ -52,7 +75,7 @@ typedef xmlEntitiesTable *xmlEntitiesTablePtr;
struct _xmlEntitiesTable { struct _xmlEntitiesTable {
int nb_entities; /* number of elements stored */ int nb_entities; /* number of elements stored */
int max_entities; /* maximum number of elements */ int max_entities; /* maximum number of elements */
xmlEntityPtr table; /* the table of entities */ xmlEntityPtr *table; /* the table of entities */
}; };
@ -60,13 +83,13 @@ struct _xmlEntitiesTable {
* External functions : * External functions :
*/ */
void xmlAddDocEntity (xmlDocPtr doc, xmlEntityPtr xmlAddDocEntity (xmlDocPtr doc,
const xmlChar *name, const xmlChar *name,
int type, int type,
const xmlChar *ExternalID, const xmlChar *ExternalID,
const xmlChar *SystemID, const xmlChar *SystemID,
const xmlChar *content); const xmlChar *content);
void xmlAddDtdEntity (xmlDocPtr doc, xmlEntityPtr xmlAddDtdEntity (xmlDocPtr doc,
const xmlChar *name, const xmlChar *name,
int type, int type,
const xmlChar *ExternalID, const xmlChar *ExternalID,
@ -88,9 +111,16 @@ xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table);
void xmlFreeEntitiesTable (xmlEntitiesTablePtr table); void xmlFreeEntitiesTable (xmlEntitiesTablePtr table);
void xmlDumpEntitiesTable (xmlBufferPtr buf, void xmlDumpEntitiesTable (xmlBufferPtr buf,
xmlEntitiesTablePtr table); xmlEntitiesTablePtr table);
void xmlDumpEntityDecl (xmlBufferPtr buf,
xmlEntityPtr ent);
xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table); xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table);
void xmlCleanupPredefinedEntities(void); void xmlCleanupPredefinedEntities(void);
#ifdef WITH_EXTRA_ENT_DETECT
int xmlEntityAddReference (xmlEntityPtr ent,
const xmlChar *to);
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -49,6 +49,9 @@ struct _xmlParserInput {
int col; /* Current column */ int col; /* Current column */
int consumed; /* How many xmlChars already consumed */ int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */ xmlParserInputDeallocate free; /* function to deallocate the base */
const xmlChar *encoding; /* the encoding string for entity */
const xmlChar *version; /* the version string for entity */
int standalone; /* Was that entity marked standalone */
}; };
/** /**
@ -95,6 +98,7 @@ typedef enum {
XML_PARSER_ENTITY_DECL, /* within an entity declaration */ XML_PARSER_ENTITY_DECL, /* within an entity declaration */
XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
XML_PARSER_EPILOG /* the Misc* after the last end tag */ XML_PARSER_EPILOG /* the Misc* after the last end tag */
} xmlParserInputState; } xmlParserInputState;
@ -151,7 +155,7 @@ struct _xmlParserCtxt {
char *directory; /* the data directory */ char *directory; /* the data directory */
/* Node name stack only used for HTML parsing */ /* Node name stack */
xmlChar *name; /* Current parsed Node */ xmlChar *name; /* Current parsed Node */
int nameNr; /* Depth of the parsing stack */ int nameNr; /* Depth of the parsing stack */
int nameMax; /* Max depth of the parsing stack */ int nameMax; /* Max depth of the parsing stack */
@ -160,6 +164,20 @@ struct _xmlParserCtxt {
long nbChars; /* number of xmlChar processed */ long nbChars; /* number of xmlChar processed */
long checkIndex; /* used by progressive parsing lookup */ long checkIndex; /* used by progressive parsing lookup */
int keepBlanks; /* ugly but ... */ int keepBlanks; /* ugly but ... */
int disableSAX; /* SAX callbacks are disabled */
int inSubset; /* Parsing is in int 1/ext 2 subset */
xmlChar * intSubName; /* name of subset */
xmlChar * extSubURI; /* URI of external subset */
xmlChar * extSubSystem; /* SYSTEM ID of external subset */
/* xml:space values */
int * space; /* Should the parser preserve spaces */
int spaceNr; /* Depth of the parsing stack */
int spaceMax; /* Max depth of the parsing stack */
int * spaceTab; /* array of space infos */
int depth; /* to prevent entity substitution loops */
xmlParserInputPtr entity; /* used to check entities boundaries */
}; };
/** /**
@ -183,6 +201,8 @@ typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
const xmlChar *publicId, const xmlChar *systemId); const xmlChar *publicId, const xmlChar *systemId);
typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name, typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID); const xmlChar *ExternalID, const xmlChar *SystemID);
typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID);
typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
const xmlChar *name); const xmlChar *name);
typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
@ -254,6 +274,7 @@ struct _xmlSAXHandler {
fatalErrorSAXFunc fatalError; fatalErrorSAXFunc fatalError;
getParameterEntitySAXFunc getParameterEntity; getParameterEntitySAXFunc getParameterEntity;
cdataBlockSAXFunc cdataBlock; cdataBlockSAXFunc cdataBlock;
externalSubsetSAXFunc externalSubset;
}; };
/** /**
@ -278,7 +299,7 @@ extern xmlSAXHandler htmlDefaultSAXHandler;
*/ */
extern int xmlSubstituteEntitiesDefaultValue; extern int xmlSubstituteEntitiesDefaultValue;
extern int xmlGetWarningsDefaultValue;
/** /**
@ -363,6 +384,20 @@ xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
const xmlChar *ExternalID, const xmlChar *ExternalID,
const xmlChar *SystemID); const xmlChar *SystemID);
int xmlParseBalancedChunkMemory(xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *string,
xmlNodePtr *list);
int xmlParseExternalEntity (xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *URL,
const xmlChar *ID,
xmlNodePtr *list);
/** /**
* SAX initialization routines * SAX initialization routines
*/ */

View File

@ -17,31 +17,6 @@ extern "C" {
#define XML_MAX_NAMELEN 1000 #define XML_MAX_NAMELEN 1000
/**
* A few macros needed to help building the parser.
*/
/* #define UNICODE */
#ifdef UNICODE
typedef unsigned long CHARVAL;
#define NEXTCHARVAL(p) (unsigned long) \
((*(p) == 0) ? (unsigned long) 0 : \
((*(p) < 0x80) ? (unsigned long) (*(p)++) : \
(*(p) < 0xC0) ? (unsigned long) 0 : \
(*(p) < 0xE0) ? ((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) : \
(*(p) < 0xF0) ? (((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
(*(p)++ & 0x3F)) : \
(*(p) < 0xF8) ? ((((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
(*(p)++ & 0x3F)) << 6 + (*(p)++ & 0x3F)) : 0))
#else
typedef unsigned char CHARVAL;
#define NEXTCHARVAL(p) (unsigned long) *(p);
#define SKIPCHARVAL(p) (p)++;
#endif
#ifdef UNICODE
/************************************************************************ /************************************************************************
* * * *
* UNICODE version of the macros. * * UNICODE version of the macros. *
@ -404,7 +379,7 @@ typedef unsigned char CHARVAL;
#define IS_EXTENDER(c) \ #define IS_EXTENDER(c) \
(((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \ (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \ ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
((c) == 0xec6) || ((c) == 0x3005) \ ((c) == 0xec6) || ((c) == 0x3005) || \
(((c) >= 0x3031) && ((c) <= 0x3035)) || \ (((c) >= 0x3031) && ((c) <= 0x3035)) || \
(((c) >= 0x309b) && ((c) <= 0x309e)) || \ (((c) >= 0x309b) && ((c) <= 0x309e)) || \
(((c) >= 0x30fc) && ((c) <= 0x30fe))) (((c) >= 0x30fc) && ((c) <= 0x30fe)))
@ -423,65 +398,6 @@ typedef unsigned char CHARVAL;
*/ */
#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
#else
/************************************************************************
* *
* 8bits / ISO-Latin version of the macros. *
* *
************************************************************************/
/*
* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
* | [#x10000-#x10FFFF]
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
#define IS_CHAR(c) \
((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
/*
* [85] BaseChar ::= ... long list see REC ...
*/
#define IS_BASECHAR(c) \
((((c) >= 0x0041) && ((c) <= 0x005A)) || \
(((c) >= 0x0061) && ((c) <= 0x007A)) || \
(((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
(((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
(((c) >= 0x00F8) && ((c) <= 0x00FF)))
/*
* [88] Digit ::= ... long list see REC ...
*/
#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
/*
* [84] Letter ::= BaseChar | Ideographic
*/
#define IS_LETTER(c) IS_BASECHAR(c)
/*
* [87] CombiningChar ::= ... long list see REC ...
*/
#define IS_COMBINING(c) 0
/*
* [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
* #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
* [#x309D-#x309E] | [#x30FC-#x30FE]
*/
#define IS_EXTENDER(c) ((c) == 0xb7)
#endif /* !UNICODE */
/*
* Blank chars.
*
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
*/
#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
((c) == 0x0D))
/* /*
* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
@ -502,10 +418,10 @@ typedef unsigned char CHARVAL;
if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
#define MOVETO_ENDTAG(p) \ #define MOVETO_ENDTAG(p) \
while (IS_CHAR(*p) && (*(p) != '>')) (p)++ while ((*p) && (*(p) != '>')) (p)++
#define MOVETO_STARTTAG(p) \ #define MOVETO_STARTTAG(p) \
while (IS_CHAR(*p) && (*(p) != '<')) (p)++ while ((*p) && (*(p) != '<')) (p)++
/** /**
* Parser context * Parser context
@ -514,10 +430,13 @@ xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename); xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer, xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
int size); int size);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
xmlParserCtxtPtr xmlNewParserCtxt (void); xmlParserCtxtPtr xmlNewParserCtxt (void);
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
const xmlChar *ID,
const xmlChar *base);
void xmlSwitchEncoding (xmlParserCtxtPtr ctxt, void xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
xmlCharEncoding enc); xmlCharEncoding enc);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
/** /**
* Entities * Entities
@ -540,7 +459,8 @@ xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
/** /**
* Namespaces. * Namespaces.
*/ */
xmlChar * xmlSplitQName (const xmlChar *name, xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
const xmlChar *name,
xmlChar **prefix); xmlChar **prefix);
xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt); xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt, xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
@ -606,6 +526,7 @@ xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
int xmlParseSDDecl (xmlParserCtxtPtr ctxt); int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
void xmlParseXMLDecl (xmlParserCtxtPtr ctxt); void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
void xmlParseMisc (xmlParserCtxtPtr ctxt); void xmlParseMisc (xmlParserCtxtPtr ctxt);
void xmlParseExternalSubset (xmlParserCtxtPtr ctxt, void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
const xmlChar *ExternalID, const xmlChar *ExternalID,
@ -624,6 +545,12 @@ xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
xmlChar end, xmlChar end,
xmlChar end2, xmlChar end2,
xmlChar end3); xmlChar end3);
xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
const xmlChar *str,
int what,
xmlChar end,
xmlChar end2,
xmlChar end3);
/* /*
* Generated by MACROS on top of parser.c c.f. PUSH_AND_POP * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP

View File

@ -36,24 +36,22 @@ typedef enum {
XML_DOCUMENT_TYPE_NODE= 10, XML_DOCUMENT_TYPE_NODE= 10,
XML_DOCUMENT_FRAG_NODE= 11, XML_DOCUMENT_FRAG_NODE= 11,
XML_NOTATION_NODE= 12, XML_NOTATION_NODE= 12,
XML_HTML_DOCUMENT_NODE= 13 XML_HTML_DOCUMENT_NODE= 13,
XML_DTD_NODE= 14,
XML_ELEMENT_DECL= 15,
XML_ATTRIBUTE_DECL= 16,
XML_ENTITY_DECL= 17
} xmlElementType; } xmlElementType;
/* /*
* Size of an internal character representation. * Size of an internal character representation.
* *
* Currently we use 8bit chars internal representation for memory efficiency, * We use 8bit chars internal representation for memory efficiency,
* but the parser is not tied to that, just define UNICODE to switch to * Note that with 8 bits wide xmlChars one can still use UTF-8 to handle
* a 16 bits internal representation. Note that with 8 bits wide * correctly non ISO-Latin input.
* xmlChars one can still use UTF-8 to handle correctly non ISO-Latin
* input.
*/ */
#ifdef UNICODE
typedef unsigned short xmlChar;
#else
typedef unsigned char xmlChar; typedef unsigned char xmlChar;
#endif
#ifndef WIN32 #ifndef WIN32
#ifndef CHAR #ifndef CHAR
@ -109,14 +107,25 @@ struct _xmlEnumeration {
typedef struct _xmlAttribute xmlAttribute; typedef struct _xmlAttribute xmlAttribute;
typedef xmlAttribute *xmlAttributePtr; typedef xmlAttribute *xmlAttributePtr;
struct _xmlAttribute { struct _xmlAttribute {
const xmlChar *elem; /* Element holding the attribute */ #ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */
const xmlChar *name; /* Attribute name */ const xmlChar *name; /* Attribute name */
struct _xmlAttribute *next; /* list of attributes of an element */ struct _xmlNode *children; /* NULL */
xmlAttributeType type; /* The type */ struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
struct _xmlAttribute *nexth; /* next in hash table */
xmlAttributeType atype; /* The attribute type */
xmlAttributeDefault def; /* the default */ xmlAttributeDefault def; /* the default */
const xmlChar *defaultValue; /* or the default value */ const xmlChar *defaultValue; /* or the default value */
xmlEnumerationPtr tree; /* or the enumeration tree if any */ xmlEnumerationPtr tree; /* or the enumeration tree if any */
const xmlChar *prefix; /* the namespace prefix if any */ const xmlChar *prefix; /* the namespace prefix if any */
const xmlChar *elem; /* Element holding the attribute */
}; };
/* /*
@ -156,8 +165,19 @@ typedef enum {
typedef struct _xmlElement xmlElement; typedef struct _xmlElement xmlElement;
typedef xmlElement *xmlElementPtr; typedef xmlElement *xmlElementPtr;
struct _xmlElement { struct _xmlElement {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ELEMENT_DECL, must be second ! */
const xmlChar *name; /* Element name */ const xmlChar *name; /* Element name */
xmlElementTypeVal type; /* The type */ struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlElementTypeVal etype; /* The type */
xmlElementContentPtr content; /* the allowed element content */ xmlElementContentPtr content; /* the allowed element content */
xmlAttributePtr attributes; /* List of the declared attributes */ xmlAttributePtr attributes; /* List of the declared attributes */
}; };
@ -188,14 +208,25 @@ struct _xmlNs {
typedef struct _xmlDtd xmlDtd; typedef struct _xmlDtd xmlDtd;
typedef xmlDtd *xmlDtdPtr; typedef xmlDtd *xmlDtdPtr;
struct _xmlDtd { struct _xmlDtd {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_DTD_NODE, must be second ! */
const xmlChar *name; /* Name of the DTD */ const xmlChar *name; /* Name of the DTD */
const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */ struct _xmlNode *children; /* the value of the property link */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */ struct _xmlNode *last; /* last child link */
struct _xmlDoc *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
/* End of common part */
void *notations; /* Hash table for notations if any */ void *notations; /* Hash table for notations if any */
void *elements; /* Hash table for elements if any */ void *elements; /* Hash table for elements if any */
void *attributes; /* Hash table for attributes if any */ void *attributes; /* Hash table for attributes if any */
void *entities; /* Hash table for entities if any */ void *entities; /* Hash table for entities if any */
/* struct xmlDtd *next; * next link for this document */ const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */
}; };
/* /*
@ -206,14 +237,17 @@ typedef xmlAttr *xmlAttrPtr;
struct _xmlAttr { struct _xmlAttr {
#ifndef XML_WITHOUT_CORBA #ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */ void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif #endif
xmlElementType type; /* XML_ATTRIBUTE_NODE, must be third ! */ xmlElementType type; /* XML_ATTRIBUTE_NODE, must be second ! */
struct _xmlNode *node; /* attr->node link */
struct _xmlAttr *next; /* attribute list link */
const xmlChar *name; /* the name of the property */ const xmlChar *name; /* the name of the property */
struct _xmlNode *val; /* the value of the property */ struct _xmlNode *children; /* the value of the property */
struct _xmlNode *last; /* NULL */
struct _xmlNode *parent; /* child->parent link */
struct _xmlAttr *next; /* next sibling link */
struct _xmlAttr *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlNs *ns; /* pointer to the associated namespace */ xmlNs *ns; /* pointer to the associated namespace */
xmlAttributeType atype; /* the attribute type if validating */
}; };
/* /*
@ -266,24 +300,25 @@ typedef xmlNode *xmlNodePtr;
struct _xmlNode { struct _xmlNode {
#ifndef XML_WITHOUT_CORBA #ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */ void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif #endif
xmlElementType type; /* type number in the DTD, must be third ! */ xmlElementType type; /* type number, must be second ! */
struct _xmlDoc *doc; /* the containing document */ const xmlChar *name; /* the name of the node, or the entity */
struct _xmlNode *children; /* parent->childs link */
struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */ struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */ struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */ struct _xmlNode *prev; /* previous sibling link */
struct _xmlNode *childs; /* parent->childs link */ struct _xmlDoc *doc; /* the containing document */
struct _xmlNode *last; /* last child link */
struct _xmlAttr *properties;/* properties list */
const xmlChar *name; /* the name of the node, or the entity */
xmlNs *ns; /* pointer to the associated namespace */ xmlNs *ns; /* pointer to the associated namespace */
xmlNs *nsDef; /* namespace definitions on this node */
#ifndef XML_USE_BUFFER_CONTENT #ifndef XML_USE_BUFFER_CONTENT
xmlChar *content; /* the content */ xmlChar *content; /* the content */
#else #else
xmlBufferPtr content; /* the content in a buffer */ xmlBufferPtr content; /* the content in a buffer */
#endif #endif
/* End of common part */
struct _xmlAttr *properties;/* properties list */
xmlNs *nsDef; /* namespace definitions on this node */
}; };
/* /*
@ -294,20 +329,27 @@ typedef xmlDoc *xmlDocPtr;
struct _xmlDoc { struct _xmlDoc {
#ifndef XML_WITHOUT_CORBA #ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */ void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif #endif
xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */ xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */
char *name; /* name/filename/URI of the document */ char *name; /* name/filename/URI of the document */
const xmlChar *version; /* the XML version string */ struct _xmlNode *children; /* the document tree */
const xmlChar *encoding; /* encoding, if any */ struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* autoreference to itself */
/* End of common part */
int compression;/* level of zlib compression */ int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */ int standalone; /* standalone document (no external refs) */
struct _xmlDtd *intSubset; /* the document internal subset */ struct _xmlDtd *intSubset; /* the document internal subset */
struct _xmlDtd *extSubset; /* the document external subset */ struct _xmlDtd *extSubset; /* the document external subset */
struct _xmlNs *oldNs; /* Global namespace, the old way */ struct _xmlNs *oldNs; /* Global namespace, the old way */
struct _xmlNode *root; /* the document tree */ const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* encoding, if any */
void *ids; /* Hash table for ID attributes if any */ void *ids; /* Hash table for ID attributes if any */
void *refs; /* Hash table for IDREFs attributes if any */ void *refs; /* Hash table for IDREFs attributes if any */
const xmlChar *URL; /* The URI for that document */
}; };
/* /*
@ -422,6 +464,8 @@ xmlNodePtr xmlNewComment (const xmlChar *content);
xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc, xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc,
const xmlChar *content, const xmlChar *content,
int len); int len);
xmlNodePtr xmlNewCharRef (xmlDocPtr doc,
const xmlChar *name);
xmlNodePtr xmlNewReference (xmlDocPtr doc, xmlNodePtr xmlNewReference (xmlDocPtr doc,
const xmlChar *name); const xmlChar *name);
xmlNodePtr xmlCopyNode (xmlNodePtr node, xmlNodePtr xmlCopyNode (xmlNodePtr node,
@ -513,13 +557,14 @@ xmlChar * xmlNodeGetContent (xmlNodePtr cur);
xmlChar * xmlNodeGetLang (xmlNodePtr cur); xmlChar * xmlNodeGetLang (xmlNodePtr cur);
void xmlNodeSetLang (xmlNodePtr cur, void xmlNodeSetLang (xmlNodePtr cur,
const xmlChar *lang); const xmlChar *lang);
int xmlNodeGetSpacePreserve (xmlNodePtr cur);
xmlChar * xmlNodeGetBase (xmlDocPtr doc, xmlChar * xmlNodeGetBase (xmlDocPtr doc,
xmlNodePtr cur); xmlNodePtr cur);
/* /*
* Removing content. * Removing content.
*/ */
int xmlRemoveProp (xmlAttrPtr attr); /* TODO */ int xmlRemoveProp (xmlAttrPtr attr);
int xmlRemoveNode (xmlNodePtr node); /* TODO */ int xmlRemoveNode (xmlNodePtr node); /* TODO */
/* /*
@ -532,6 +577,12 @@ void xmlBufferWriteChar (xmlBufferPtr buf,
void xmlBufferWriteQuotedString(xmlBufferPtr buf, void xmlBufferWriteQuotedString(xmlBufferPtr buf,
const xmlChar *string); const xmlChar *string);
/*
* Namespace handling
*/
int xmlReconciliateNs (xmlDocPtr doc,
xmlNodePtr tree);
/* /*
* Saving * Saving
*/ */

View File

@ -29,6 +29,14 @@ struct _xmlValidCtxt {
void *userData; /* user specific data block */ void *userData; /* user specific data block */
xmlValidityErrorFunc error; /* the callback in case of errors */ xmlValidityErrorFunc error; /* the callback in case of errors */
xmlValidityWarningFunc warning; /* the callback in case of warning */ xmlValidityWarningFunc warning; /* the callback in case of warning */
/* Node analysis stack used when validating within entities */
xmlNodePtr node; /* Current parsed Node */
int nodeNr; /* Depth of the parsing stack */
int nodeMax; /* Max depth of the parsing stack */
xmlNodePtr *nodeTab; /* array of nodes */
int finishDtd; /* finished validating the Dtd ? */
}; };
/* /*
@ -114,6 +122,8 @@ xmlNotationPtr xmlAddNotationDecl (xmlValidCtxtPtr ctxt,
const xmlChar *SystemID); const xmlChar *SystemID);
xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table); xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table);
void xmlFreeNotationTable(xmlNotationTablePtr table); void xmlFreeNotationTable(xmlNotationTablePtr table);
void xmlDumpNotationDecl (xmlBufferPtr buf,
xmlNotationPtr nota);
void xmlDumpNotationTable(xmlBufferPtr buf, void xmlDumpNotationTable(xmlBufferPtr buf,
xmlNotationTablePtr table); xmlNotationTablePtr table);
@ -122,6 +132,9 @@ xmlElementContentPtr xmlNewElementContent (xmlChar *name,
xmlElementContentType type); xmlElementContentType type);
xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content); xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content);
void xmlFreeElementContent(xmlElementContentPtr cur); void xmlFreeElementContent(xmlElementContentPtr cur);
void xmlSprintfElementContent(char *buf,
xmlElementContentPtr content,
int glob);
/* Element */ /* Element */
xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt, xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt,
@ -133,6 +146,8 @@ xmlElementTablePtr xmlCopyElementTable (xmlElementTablePtr table);
void xmlFreeElementTable (xmlElementTablePtr table); void xmlFreeElementTable (xmlElementTablePtr table);
void xmlDumpElementTable (xmlBufferPtr buf, void xmlDumpElementTable (xmlBufferPtr buf,
xmlElementTablePtr table); xmlElementTablePtr table);
void xmlDumpElementDecl (xmlBufferPtr buf,
xmlElementPtr elem);
/* Enumeration */ /* Enumeration */
xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name); xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name);
@ -144,6 +159,7 @@ xmlAttributePtr xmlAddAttributeDecl (xmlValidCtxtPtr ctxt,
xmlDtdPtr dtd, xmlDtdPtr dtd,
const xmlChar *elem, const xmlChar *elem,
const xmlChar *name, const xmlChar *name,
const xmlChar *prefix,
xmlAttributeType type, xmlAttributeType type,
xmlAttributeDefault def, xmlAttributeDefault def,
const xmlChar *defaultValue, const xmlChar *defaultValue,
@ -152,6 +168,8 @@ xmlAttributeTablePtr xmlCopyAttributeTable (xmlAttributeTablePtr table);
void xmlFreeAttributeTable (xmlAttributeTablePtr table); void xmlFreeAttributeTable (xmlAttributeTablePtr table);
void xmlDumpAttributeTable (xmlBufferPtr buf, void xmlDumpAttributeTable (xmlBufferPtr buf,
xmlAttributeTablePtr table); xmlAttributeTablePtr table);
void xmlDumpAttributeDecl (xmlBufferPtr buf,
xmlAttributePtr attr);
/* IDs */ /* IDs */
xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt, xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt,
@ -188,6 +206,10 @@ int xmlValidateRoot (xmlValidCtxtPtr ctxt,
int xmlValidateElementDecl (xmlValidCtxtPtr ctxt, int xmlValidateElementDecl (xmlValidCtxtPtr ctxt,
xmlDocPtr doc, xmlDocPtr doc,
xmlElementPtr elem); xmlElementPtr elem);
xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc,
xmlNodePtr elem,
const xmlChar *name,
const xmlChar *value);
int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt,
xmlDocPtr doc, xmlDocPtr doc,
xmlAttributePtr attr); xmlAttributePtr attr);
@ -199,6 +221,8 @@ int xmlValidateNotationDecl (xmlValidCtxtPtr ctxt,
int xmlValidateDtd (xmlValidCtxtPtr ctxt, int xmlValidateDtd (xmlValidCtxtPtr ctxt,
xmlDocPtr doc, xmlDocPtr doc,
xmlDtdPtr dtd); xmlDtdPtr dtd);
int xmlValidateDtdFinal (xmlValidCtxtPtr ctxt,
xmlDocPtr doc);
int xmlValidateDocument (xmlValidCtxtPtr ctxt, int xmlValidateDocument (xmlValidCtxtPtr ctxt,
xmlDocPtr doc); xmlDocPtr doc);
int xmlValidateElement (xmlValidCtxtPtr ctxt, int xmlValidateElement (xmlValidCtxtPtr ctxt,

View File

@ -8,7 +8,7 @@
#ifndef _DEBUG_MEMORY_ALLOC_ #ifndef _DEBUG_MEMORY_ALLOC_
#define _DEBUG_MEMORY_ALLOC_ #define _DEBUG_MEMORY_ALLOC_
#define NO_DEBUG_MEMORY /* #define NO_DEBUG_MEMORY */
#ifdef NO_DEBUG_MEMORY #ifdef NO_DEBUG_MEMORY
#ifdef HAVE_MALLOC_H #ifdef HAVE_MALLOC_H

View File

@ -3,14 +3,13 @@
%define prefix /usr %define prefix /usr
Summary: libXML library Summary: libXML library
Name: libxml Name: libxml2
Version: %ver Version: %ver
Release: 1 Release: 1
Copyright: LGPL Copyright: LGPL
Group: X11/Libraries Group: X11/Libraries
Source: ftp://ftp.gnome.org/pub/GNOME/sources/libxml/libxml-%{ver}.tar.gz Source: ftp://ftp.gnome.org/pub/GNOME/sources/libxml/libxml-%{ver}.tar.gz
BuildRoot: /var/tmp/libxml-%{PACKAGE_VERSION}-root BuildRoot: /var/tmp/libxml-%{PACKAGE_VERSION}-root
Provides: libxml.so.0
URL: http://rpmfind.net/veillard/XML/ URL: http://rpmfind.net/veillard/XML/
Prereq: /sbin/install-info Prereq: /sbin/install-info

View File

@ -869,10 +869,11 @@ xmlNanoFTPConnect(void *ctx) {
else else
#ifndef HAVE_SNPRINTF #ifndef HAVE_SNPRINTF
len = sprintf(buf, "PASS libxml@%s\r\n", len = sprintf(buf, "PASS libxml@%s\r\n",
hostname);
#else /* HAVE_SNPRINTF */ #else /* HAVE_SNPRINTF */
len = snprintf(buf, sizeof(buf), "PASS libxml@%s\r\n", len = snprintf(buf, sizeof(buf), "PASS libxml@%s\r\n",
#endif /* HAVE_SNPRINTF */
hostname); hostname);
#endif /* HAVE_SNPRINTF */
#ifdef DEBUG_FTP #ifdef DEBUG_FTP
printf(buf); printf(buf);
#endif #endif
@ -1226,11 +1227,13 @@ xmlNanoFTPGetConnection(void *ctx) {
portp = (unsigned char *) &dataAddr.sin_port; portp = (unsigned char *) &dataAddr.sin_port;
#ifndef HAVE_SNPRINTF #ifndef HAVE_SNPRINTF
len = sprintf(buf, "PORT %d,%d,%d,%d,%d,%d\r\n", len = sprintf(buf, "PORT %d,%d,%d,%d,%d,%d\r\n",
#else /* HAVE_SNPRINTF */
len = snprintf(buf, sizeof(buf), "PORT %d,%d,%d,%d,%d,%d\r\n",
#endif /* HAVE_SNPRINTF */
adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff, adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff,
portp[0] & 0xff, portp[1] & 0xff); portp[0] & 0xff, portp[1] & 0xff);
#else /* HAVE_SNPRINTF */
len = snprintf(buf, sizeof(buf), "PORT %d,%d,%d,%d,%d,%d\r\n",
adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff,
portp[0] & 0xff, portp[1] & 0xff);
#endif /* HAVE_SNPRINTF */
buf[sizeof(buf) - 1] = 0; buf[sizeof(buf) - 1] = 0;
#ifdef DEBUG_FTP #ifdef DEBUG_FTP
printf(buf); printf(buf);
@ -1264,13 +1267,34 @@ int
xmlNanoFTPCloseConnection(void *ctx) { xmlNanoFTPCloseConnection(void *ctx) {
xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx;
int res; int res;
fd_set rfd, efd;
struct timeval tv;
close(ctxt->dataFd); ctxt->dataFd = -1; close(ctxt->dataFd); ctxt->dataFd = -1;
tv.tv_sec = 15;
tv.tv_usec = 0;
FD_ZERO(&rfd);
FD_SET(ctxt->controlFd, &rfd);
FD_ZERO(&efd);
FD_SET(ctxt->controlFd, &efd);
res = select(ctxt->controlFd + 1, &rfd, NULL, &efd, &tv);
if (res < 0) {
#ifdef DEBUG_FTP
perror("select");
#endif
close(ctxt->controlFd); ctxt->controlFd = -1;
return(-1);
}
if (res == 0) {
fprintf(stderr, "xmlNanoFTPCloseConnection: timeout\n");
close(ctxt->controlFd); ctxt->controlFd = -1;
} else {
res = xmlNanoFTPGetResponse(ctxt); res = xmlNanoFTPGetResponse(ctxt);
if (res != 2) { if (res != 2) {
close(ctxt->controlFd); ctxt->controlFd = -1; close(ctxt->controlFd); ctxt->controlFd = -1;
return(-1); return(-1);
} }
}
return(0); return(0);
} }

View File

@ -753,7 +753,7 @@ retry:
} }
ctxt->fd = ret; ctxt->fd = ret;
if (proxy) { if (proxy) {
#ifdef have_snprintf #ifdef HAVE_SNPRINTF
if (ctxt->port != 80) if (ctxt->port != 80)
snprintf(buf, sizeof(buf), snprintf(buf, sizeof(buf),
"GET http://%s:%d%s HTTP/1.0\r\nHost: %s\r\n\r\n", "GET http://%s:%d%s HTTP/1.0\r\nHost: %s\r\n\r\n",

3399
parser.c

File diff suppressed because it is too large Load Diff

View File

@ -49,6 +49,9 @@ struct _xmlParserInput {
int col; /* Current column */ int col; /* Current column */
int consumed; /* How many xmlChars already consumed */ int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */ xmlParserInputDeallocate free; /* function to deallocate the base */
const xmlChar *encoding; /* the encoding string for entity */
const xmlChar *version; /* the version string for entity */
int standalone; /* Was that entity marked standalone */
}; };
/** /**
@ -95,6 +98,7 @@ typedef enum {
XML_PARSER_ENTITY_DECL, /* within an entity declaration */ XML_PARSER_ENTITY_DECL, /* within an entity declaration */
XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
XML_PARSER_EPILOG /* the Misc* after the last end tag */ XML_PARSER_EPILOG /* the Misc* after the last end tag */
} xmlParserInputState; } xmlParserInputState;
@ -151,7 +155,7 @@ struct _xmlParserCtxt {
char *directory; /* the data directory */ char *directory; /* the data directory */
/* Node name stack only used for HTML parsing */ /* Node name stack */
xmlChar *name; /* Current parsed Node */ xmlChar *name; /* Current parsed Node */
int nameNr; /* Depth of the parsing stack */ int nameNr; /* Depth of the parsing stack */
int nameMax; /* Max depth of the parsing stack */ int nameMax; /* Max depth of the parsing stack */
@ -160,6 +164,20 @@ struct _xmlParserCtxt {
long nbChars; /* number of xmlChar processed */ long nbChars; /* number of xmlChar processed */
long checkIndex; /* used by progressive parsing lookup */ long checkIndex; /* used by progressive parsing lookup */
int keepBlanks; /* ugly but ... */ int keepBlanks; /* ugly but ... */
int disableSAX; /* SAX callbacks are disabled */
int inSubset; /* Parsing is in int 1/ext 2 subset */
xmlChar * intSubName; /* name of subset */
xmlChar * extSubURI; /* URI of external subset */
xmlChar * extSubSystem; /* SYSTEM ID of external subset */
/* xml:space values */
int * space; /* Should the parser preserve spaces */
int spaceNr; /* Depth of the parsing stack */
int spaceMax; /* Max depth of the parsing stack */
int * spaceTab; /* array of space infos */
int depth; /* to prevent entity substitution loops */
xmlParserInputPtr entity; /* used to check entities boundaries */
}; };
/** /**
@ -183,6 +201,8 @@ typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
const xmlChar *publicId, const xmlChar *systemId); const xmlChar *publicId, const xmlChar *systemId);
typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name, typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID); const xmlChar *ExternalID, const xmlChar *SystemID);
typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID);
typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
const xmlChar *name); const xmlChar *name);
typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
@ -254,6 +274,7 @@ struct _xmlSAXHandler {
fatalErrorSAXFunc fatalError; fatalErrorSAXFunc fatalError;
getParameterEntitySAXFunc getParameterEntity; getParameterEntitySAXFunc getParameterEntity;
cdataBlockSAXFunc cdataBlock; cdataBlockSAXFunc cdataBlock;
externalSubsetSAXFunc externalSubset;
}; };
/** /**
@ -278,7 +299,7 @@ extern xmlSAXHandler htmlDefaultSAXHandler;
*/ */
extern int xmlSubstituteEntitiesDefaultValue; extern int xmlSubstituteEntitiesDefaultValue;
extern int xmlGetWarningsDefaultValue;
/** /**
@ -363,6 +384,20 @@ xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
const xmlChar *ExternalID, const xmlChar *ExternalID,
const xmlChar *SystemID); const xmlChar *SystemID);
int xmlParseBalancedChunkMemory(xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *string,
xmlNodePtr *list);
int xmlParseExternalEntity (xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *URL,
const xmlChar *ID,
xmlNodePtr *list);
/** /**
* SAX initialization routines * SAX initialization routines
*/ */

View File

@ -17,31 +17,6 @@ extern "C" {
#define XML_MAX_NAMELEN 1000 #define XML_MAX_NAMELEN 1000
/**
* A few macros needed to help building the parser.
*/
/* #define UNICODE */
#ifdef UNICODE
typedef unsigned long CHARVAL;
#define NEXTCHARVAL(p) (unsigned long) \
((*(p) == 0) ? (unsigned long) 0 : \
((*(p) < 0x80) ? (unsigned long) (*(p)++) : \
(*(p) < 0xC0) ? (unsigned long) 0 : \
(*(p) < 0xE0) ? ((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) : \
(*(p) < 0xF0) ? (((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
(*(p)++ & 0x3F)) : \
(*(p) < 0xF8) ? ((((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
(*(p)++ & 0x3F)) << 6 + (*(p)++ & 0x3F)) : 0))
#else
typedef unsigned char CHARVAL;
#define NEXTCHARVAL(p) (unsigned long) *(p);
#define SKIPCHARVAL(p) (p)++;
#endif
#ifdef UNICODE
/************************************************************************ /************************************************************************
* * * *
* UNICODE version of the macros. * * UNICODE version of the macros. *
@ -404,7 +379,7 @@ typedef unsigned char CHARVAL;
#define IS_EXTENDER(c) \ #define IS_EXTENDER(c) \
(((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \ (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \ ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
((c) == 0xec6) || ((c) == 0x3005) \ ((c) == 0xec6) || ((c) == 0x3005) || \
(((c) >= 0x3031) && ((c) <= 0x3035)) || \ (((c) >= 0x3031) && ((c) <= 0x3035)) || \
(((c) >= 0x309b) && ((c) <= 0x309e)) || \ (((c) >= 0x309b) && ((c) <= 0x309e)) || \
(((c) >= 0x30fc) && ((c) <= 0x30fe))) (((c) >= 0x30fc) && ((c) <= 0x30fe)))
@ -423,65 +398,6 @@ typedef unsigned char CHARVAL;
*/ */
#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
#else
/************************************************************************
* *
* 8bits / ISO-Latin version of the macros. *
* *
************************************************************************/
/*
* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
* | [#x10000-#x10FFFF]
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
#define IS_CHAR(c) \
((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
/*
* [85] BaseChar ::= ... long list see REC ...
*/
#define IS_BASECHAR(c) \
((((c) >= 0x0041) && ((c) <= 0x005A)) || \
(((c) >= 0x0061) && ((c) <= 0x007A)) || \
(((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
(((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
(((c) >= 0x00F8) && ((c) <= 0x00FF)))
/*
* [88] Digit ::= ... long list see REC ...
*/
#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
/*
* [84] Letter ::= BaseChar | Ideographic
*/
#define IS_LETTER(c) IS_BASECHAR(c)
/*
* [87] CombiningChar ::= ... long list see REC ...
*/
#define IS_COMBINING(c) 0
/*
* [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
* #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
* [#x309D-#x309E] | [#x30FC-#x30FE]
*/
#define IS_EXTENDER(c) ((c) == 0xb7)
#endif /* !UNICODE */
/*
* Blank chars.
*
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
*/
#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
((c) == 0x0D))
/* /*
* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
@ -502,10 +418,10 @@ typedef unsigned char CHARVAL;
if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
#define MOVETO_ENDTAG(p) \ #define MOVETO_ENDTAG(p) \
while (IS_CHAR(*p) && (*(p) != '>')) (p)++ while ((*p) && (*(p) != '>')) (p)++
#define MOVETO_STARTTAG(p) \ #define MOVETO_STARTTAG(p) \
while (IS_CHAR(*p) && (*(p) != '<')) (p)++ while ((*p) && (*(p) != '<')) (p)++
/** /**
* Parser context * Parser context
@ -514,10 +430,13 @@ xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename); xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer, xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
int size); int size);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
xmlParserCtxtPtr xmlNewParserCtxt (void); xmlParserCtxtPtr xmlNewParserCtxt (void);
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
const xmlChar *ID,
const xmlChar *base);
void xmlSwitchEncoding (xmlParserCtxtPtr ctxt, void xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
xmlCharEncoding enc); xmlCharEncoding enc);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
/** /**
* Entities * Entities
@ -540,7 +459,8 @@ xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
/** /**
* Namespaces. * Namespaces.
*/ */
xmlChar * xmlSplitQName (const xmlChar *name, xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
const xmlChar *name,
xmlChar **prefix); xmlChar **prefix);
xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt); xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt, xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
@ -606,6 +526,7 @@ xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
int xmlParseSDDecl (xmlParserCtxtPtr ctxt); int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
void xmlParseXMLDecl (xmlParserCtxtPtr ctxt); void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
void xmlParseMisc (xmlParserCtxtPtr ctxt); void xmlParseMisc (xmlParserCtxtPtr ctxt);
void xmlParseExternalSubset (xmlParserCtxtPtr ctxt, void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
const xmlChar *ExternalID, const xmlChar *ExternalID,
@ -624,6 +545,12 @@ xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
xmlChar end, xmlChar end,
xmlChar end2, xmlChar end2,
xmlChar end3); xmlChar end3);
xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
const xmlChar *str,
int what,
xmlChar end,
xmlChar end2,
xmlChar end3);
/* /*
* Generated by MACROS on top of parser.c c.f. PUSH_AND_POP * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP

View File

@ -1,6 +1,5 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd"> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
<!--DOCTYPE svg SYSTEM "svg-19990412.dtd"-->
<svg width="4in" height="3in"> <svg width="4in" height="3in">
<title>Kona Lavadome mountain bike <title>Kona Lavadome mountain bike
</title> </title>

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@ -1,3 +1,6 @@
./test/VC/OneID:4: validity error: Element doc has too may ID attributes defined : id ./test/VC/OneID:4: validity error: Element doc has too may ID attributes defined : id
<!ATTLIST doc id ID #IMPLIED> <!ATTLIST doc id ID #IMPLIED>
^ ^
./test/VC/OneID:4: validity error: Element doc has 2 ID attribute defined in the internal subset : id
<!ATTLIST doc id ID #IMPLIED>
^

View File

@ -1,3 +1,6 @@
./test/VC/OneID2:3: validity error: Element doc has 2 ID attribute defined in the internal subset : id
<!ATTLIST doc id ID #IMPLIED>
^
./test/VC/OneID2:4: validity error: Element doc has too may ID attributes defined : val ./test/VC/OneID2:4: validity error: Element doc has too may ID attributes defined : val
<!ELEMENT doc (#PCDATA)> <!ELEMENT doc (#PCDATA)>
^ ^

View File

@ -1,3 +1,3 @@
./test/VC/OneID3:2: validity error: Element doc has ID attribute defined in the external subset : id dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val
<!ATTLIST doc id ID #IMPLIED> <!ATTLIST doc val ID #IMPLIED>
^ ^

View File

@ -1,3 +1,3 @@
./test/VC/UniqueElementTypeDeclaration:3: validity error: Redefinition of element a dtds/a.dtd:1: validity error: Redefinition of element a
<!ELEMENT a (#PCDATA | b | c)*> <!ELEMENT a (#PCDATA | b | c)*>
^ ^

View File

@ -1,3 +1,4 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN" "http://www.sgmlsource.com/dtds/memo.dtd"> <!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN" "http://www.sgmlsource.com/dtds/memo.dtd">
<MEMO/> <MEMO>
</MEMO>

View File

@ -1,6 +1,6 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!DOCTYPE doc [ <!DOCTYPE doc [
<!ENTITY % YN '"Yes"'> <!ENTITY YN '"Yes"'>
<!ENTITY WhatHeSaid "He said %YN;"> <!ENTITY WhatHeSaid "He said &YN;">
]> ]>
<doc>&WhatHeSaid;</doc> <doc>&WhatHeSaid;</doc>

View File

@ -0,0 +1,7 @@
<?xml version="1.0"?>
<!-- comment before the DTD -->
<!DOCTYPE doc [
<!ELEMENT doc ANY>
]>
<!-- comment after the DTD -->
<doc/>

View File

@ -1,5 +1,5 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<EXAMPLE> <EXAMPLE>
This is an inverted exclamation sign &#161; This is an inverted exclamation sign &#xA1;
This is a space This is a space
</EXAMPLE> </EXAMPLE>

View File

@ -6,5 +6,5 @@
<!ELEMENT para (#PCDATA)> <!ELEMENT para (#PCDATA)>
]> ]>
<item> <item>
<para>&apos;they called me &sampleEnt;&apos;</para> <para>'they called me &sampleEnt;'</para>
</item> </item>

View File

@ -4,7 +4,7 @@
<!ENTITY test2 "test 2"> <!ENTITY test2 "test 2">
]> ]>
<doc> <doc>
<Content>Reten&#231;&#227;o</Content> <Content>Reten&#xE7;&#xE3;o</Content>
<Content>&lt;&gt;</Content> <Content>&lt;&gt;</Content>
<Content>&test1;&test2;</Content> <Content>&test1;&test2;</Content>
</doc> </doc>

View File

@ -2,4 +2,5 @@
<!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [ <!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [
<!ENTITY iso6.doc.date "29-May-1999"> <!ENTITY iso6.doc.date "29-May-1999">
]> ]>
<spec/> <spec>
</spec>

View File

@ -1,3 +1,4 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN" "http://www.sgmlsource.com/dtds/memo.dtd"> <!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN" "http://www.sgmlsource.com/dtds/memo.dtd">
<MEMO/> <MEMO>
</MEMO>

View File

@ -1,6 +1,6 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!DOCTYPE doc [ <!DOCTYPE doc [
<!ENTITY % YN '"Yes"'> <!ENTITY YN '"Yes"'>
<!ENTITY WhatHeSaid "He said %YN;"> <!ENTITY WhatHeSaid "He said &YN;">
]> ]>
<doc>He said &quot;Yes&quot;</doc> <doc>He said &amp;YN;</doc>

View File

@ -1,5 +1,5 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<EXAMPLE> <EXAMPLE>
This is an inverted exclamation sign &#161; This is an inverted exclamation sign &#xA1;
This is a space This is a space
</EXAMPLE> </EXAMPLE>

View File

@ -6,5 +6,5 @@
<!ELEMENT para (#PCDATA)> <!ELEMENT para (#PCDATA)>
]> ]>
<item> <item>
<para>&apos;they called me the hyacinth girl&apos;</para> <para>'they called me the hyacinth girl'</para>
</item> </item>

View File

@ -4,7 +4,7 @@
<!ENTITY test2 "test 2"> <!ENTITY test2 "test 2">
]> ]>
<doc> <doc>
<Content>Reten&#231;&#227;o</Content> <Content>Reten&#xE7;&#xE3;o</Content>
<Content>&lt;&gt;</Content> <Content>&lt;&gt;</Content>
<Content>test 1test 2</Content> <Content>test 1test 2</Content>
</doc> </doc>

View File

@ -2,4 +2,5 @@
<!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [ <!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [
<!ENTITY iso6.doc.date "29-May-1999"> <!ENTITY iso6.doc.date "29-May-1999">
]> ]>
<spec/> <spec>
</spec>

View File

@ -2,14 +2,12 @@
<RDF:RDF xmlns:RDF="http://www.w3.org/TR/WD-rdf-syntax#" p3p="http//www.w3.org/TR/1998/WD-P3P10-syntax#proposal.DTD"> <RDF:RDF xmlns:RDF="http://www.w3.org/TR/WD-rdf-syntax#" p3p="http//www.w3.org/TR/1998/WD-P3P10-syntax#proposal.DTD">
<PROP realm="http://www.CoolCatalog.com/catalogue/" entity="CoolCatalog" agreeID="94df1293a3e519bb" assurance="http://www.TrustUs.org"> <PROP realm="http://www.CoolCatalog.com/catalogue/" entity="CoolCatalog" agreeID="94df1293a3e519bb" assurance="http://www.TrustUs.org">
<USES> <USES>
<STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you&apos;d appreciate."> <STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you'd appreciate.">
<WITH> <WITH><PREFIX name="User.">
<PREFIX name="User.">
<REF name="Name.First"/> <REF name="Name.First"/>
<REF name="Bdate.Year" optional="1"/> <REF name="Bdate.Year" optional="1"/>
<REF name="Gender"/> <REF name="Gender"/>
</PREFIX> </PREFIX></WITH>
</WITH>
</STATEMENT> </STATEMENT>
</USES> </USES>
<USES> <USES>
@ -18,5 +16,4 @@
</STATEMENT> </STATEMENT>
</USES> </USES>
<DISCLOSURE discURI="http://www.CoolCatalog.com/PrivacyPractice.html" access="3" other="0,1"/> <DISCLOSURE discURI="http://www.CoolCatalog.com/PrivacyPractice.html" access="3" other="0,1"/>
</PROP> </PROP></RDF:RDF>
</RDF:RDF>

View File

@ -11,11 +11,11 @@
<RPM:Packager>Till Bubeck &lt;bubeck@delix.de&gt;, Ngo Than &lt;than@delix.de&gt;</RPM:Packager> <RPM:Packager>Till Bubeck &lt;bubeck@delix.de&gt;, Ngo Than &lt;than@delix.de&gt;</RPM:Packager>
<RPM:Group>Libraries</RPM:Group> <RPM:Group>Libraries</RPM:Group>
<RPM:Summary>Bibliothek zur Ansteuerung von Terminals</RPM:Summary> <RPM:Summary>Bibliothek zur Ansteuerung von Terminals</RPM:Summary>
<RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#228;ngige <RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#xE4;ngige
Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#252;gung, die Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#xFC;gung, die
speziell optimiert sind. speziell optimiert sind.
Diese Version ist die &apos;new curses&apos; (ncurses) Variante und ist der Diese Version ist die 'new curses' (ncurses) Variante und ist der
anerkannte Ersatz f&#252;r die klassische Curses-Library, die nicht mehr anerkannte Ersatz f&#xFC;r die klassische Curses-Library, die nicht mehr
weiterentwickelt wird.</RPM:Description> weiterentwickelt wird.</RPM:Description>
<RPM:Copyright>GPL</RPM:Copyright> <RPM:Copyright>GPL</RPM:Copyright>
<RPM:Sources>ncurses4-4.2-3.src.rpm</RPM:Sources> <RPM:Sources>ncurses4-4.2-3.src.rpm</RPM:Sources>

View File

@ -1,51 +1,63 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/"> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/">
<channel> <channel>
<title>Slashdot:News for Nerds. Stuff that Matters.</title> <title>Slashdot:News for Nerds. Stuff that Matters.</title>
<link>http://slashdot.org/</link> <link>http://slashdot.org/</link>
<description>News for Nerds. Stuff that Matters</description> <description>News for Nerds. Stuff that Matters</description>
</channel> </channel>
<image> <image>
<title>Slashdot</title> <title>Slashdot</title>
<url>http://slashdot.org/images/slashdotlg.gif</url> <url>http://slashdot.org/images/slashdotlg.gif</url>
<link>http://slashdot.org</link> <link>http://slashdot.org</link>
</image> </image>
<item> <item>
<title>100 Mbit/s on Fibre to the home</title> <title>100 Mbit/s on Fibre to the home</title>
<link>http://slashdot.org/articles/99/06/06/1440211.shtml</link> <link>http://slashdot.org/articles/99/06/06/1440211.shtml</link>
</item> </item>
<item> <item>
<title>Gimp 1.2 Preview</title> <title>Gimp 1.2 Preview</title>
<link>http://slashdot.org/articles/99/06/06/1438246.shtml</link> <link>http://slashdot.org/articles/99/06/06/1438246.shtml</link>
</item> </item>
<item> <item>
<title>Sony&apos;s AIBO robot Sold Out</title> <title>Sony's AIBO robot Sold Out</title>
<link>http://slashdot.org/articles/99/06/06/1432256.shtml</link> <link>http://slashdot.org/articles/99/06/06/1432256.shtml</link>
</item> </item>
<item> <item>
<title>Ask Slashdot: Another Word for &quot;Hacker&quot;?</title> <title>Ask Slashdot: Another Word for &quot;Hacker&quot;?</title>
<link>http://slashdot.org/askslashdot/99/06/05/1815225.shtml</link> <link>http://slashdot.org/askslashdot/99/06/05/1815225.shtml</link>
</item> </item>
<item> <item>
<title>Corel Linux FAQ</title> <title>Corel Linux FAQ</title>
<link>http://slashdot.org/articles/99/06/05/1842218.shtml</link> <link>http://slashdot.org/articles/99/06/05/1842218.shtml</link>
</item> </item>
<item> <item>
<title>Upside downsides MP3.COM.</title> <title>Upside downsides MP3.COM.</title>
<link>http://slashdot.org/articles/99/06/05/1558210.shtml</link> <link>http://slashdot.org/articles/99/06/05/1558210.shtml</link>
</item> </item>
<item> <item>
<title>2 Terabits of Bandwidth</title> <title>2 Terabits of Bandwidth</title>
<link>http://slashdot.org/articles/99/06/05/1554258.shtml</link> <link>http://slashdot.org/articles/99/06/05/1554258.shtml</link>
</item> </item>
<item> <item>
<title>Suppression of cold fusion research?</title> <title>Suppression of cold fusion research?</title>
<link>http://slashdot.org/articles/99/06/04/2313200.shtml</link> <link>http://slashdot.org/articles/99/06/04/2313200.shtml</link>
</item> </item>
<item> <item>
<title>California Gov. Halts Wage Info Sale</title> <title>California Gov. Halts Wage Info Sale</title>
<link>http://slashdot.org/articles/99/06/04/235256.shtml</link> <link>http://slashdot.org/articles/99/06/04/235256.shtml</link>
</item> </item>
<item> <item>
<title>Red Hat Announces IPO</title> <title>Red Hat Announces IPO</title>
<link>http://slashdot.org/articles/99/06/04/0849207.shtml</link> <link>http://slashdot.org/articles/99/06/04/0849207.shtml</link>

View File

@ -5,7 +5,7 @@
<url>http://slashdot.org/articles/99/06/06/1440211.shtml</url> <url>http://slashdot.org/articles/99/06/06/1440211.shtml</url>
<time>1999-06-06 14:39:59</time> <time>1999-06-06 14:39:59</time>
<author>CmdrTaco</author> <author>CmdrTaco</author>
<department>wouldn&apos;t-it-be-nice</department> <department>wouldn't-it-be-nice</department>
<topic>internet</topic> <topic>internet</topic>
<comments>20</comments> <comments>20</comments>
<section>articles</section> <section>articles</section>
@ -23,7 +23,7 @@
<image>topicgimp.gif</image> <image>topicgimp.gif</image>
</story> </story>
<story> <story>
<title>Sony&apos;s AIBO robot Sold Out</title> <title>Sony's AIBO robot Sold Out</title>
<url>http://slashdot.org/articles/99/06/06/1432256.shtml</url> <url>http://slashdot.org/articles/99/06/06/1432256.shtml</url>
<time>1999-06-06 14:32:51</time> <time>1999-06-06 14:32:51</time>
<author>CmdrTaco</author> <author>CmdrTaco</author>

View File

@ -1,7 +1,8 @@
<?xml version="1.0" standalone="yes"?> <?xml version="1.0" standalone="yes"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd"> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
<svg width="242px" height="383px"> <svg width="242px" height="383px">
<g style="stroke: #000000"/> <g style="stroke: #000000">
</g>
<g style="fill: #f2cc99"> <g style="fill: #f2cc99">
<polyline verts=" 69,18 82,8 99,3 118,5 135,12 149,21 156,13 165,9 177,13 183,28 180,50 164,91 155,107 154,114 151,121 141,127 139,136 155,206 157,251 126,342 133,357 128,376 83,376 75,368 67,350 61,350 53,369 4,369 2,361 5,354 12,342 16,321 4,257 4,244 7,218 9,179 26,127 43,93 32,77 30,70 24,67 16,49 17,35 18,23 30,12 40,7 53,7 62,12 69,18 69,18 69,18"/> <polyline verts=" 69,18 82,8 99,3 118,5 135,12 149,21 156,13 165,9 177,13 183,28 180,50 164,91 155,107 154,114 151,121 141,127 139,136 155,206 157,251 126,342 133,357 128,376 83,376 75,368 67,350 61,350 53,369 4,369 2,361 5,354 12,342 16,321 4,257 4,244 7,218 9,179 26,127 43,93 32,77 30,70 24,67 16,49 17,35 18,23 30,12 40,7 53,7 62,12 69,18 69,18 69,18"/>
</g> </g>
@ -157,5 +158,4 @@
<polyline verts=" 147,338 142,341 143,345 141,354 147,343 147,338 147,338 147,338"/> <polyline verts=" 147,338 142,341 143,345 141,354 147,343 147,338 147,338 147,338"/>
<polyline verts=" 157,342 156,349 150,356 157,353 163,346 162,342 157,342 157,342 157,342"/> <polyline verts=" 157,342 156,349 150,356 157,353 163,346 162,342 157,342 157,342 157,342"/>
<polyline verts=" 99,265 96,284 92,299 73,339 73,333 87,300 99,265 99,265 99,265"/> <polyline verts=" 99,265 96,284 92,299 73,339 73,333 87,300 99,265 99,265 99,265"/>
</g> </g></svg>
</svg>

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -8,7 +8,8 @@
<g style="stroke: #800040"> <g style="stroke: #800040">
<polyline verts=" 32,100 72,50 90,82 73,16 120,64 152,9 177,107"/> <polyline verts=" 32,100 72,50 90,82 73,16 120,64 152,9 177,107"/>
</g> </g>
<g style="stroke: #000000"/> <g style="stroke: #000000">
</g>
<g style="stroke: #0000ff"> <g style="stroke: #0000ff">
<rect x="30" y="101" width="51" height="33"/> <rect x="30" y="101" width="51" height="33"/>
</g> </g>
@ -38,11 +39,13 @@
<g style="stroke: #008080"> <g style="stroke: #008080">
<text x="176" y="85">sadfsadfsad</text> <text x="176" y="85">sadfsadfsad</text>
</g> </g>
<g style="stroke: #000000"/> <g style="stroke: #000000">
</g>
<g style="fill: #800040"> <g style="fill: #800040">
<ellipse cx="208" cy="180" major="45" minor="31" angle="0"/> <ellipse cx="208" cy="180" major="45" minor="31" angle="0"/>
</g> </g>
<g style="stroke: #000000"/> <g style="stroke: #000000">
</g>
<g style="fill: #ffffff"> <g style="fill: #ffffff">
<g> <g>
<desc> Java Font definition:Dialog 700</desc> <desc> Java Font definition:Dialog 700</desc>
@ -50,5 +53,4 @@
<g> <g>
<desc> Java Font definition:Dialog 700</desc> <desc> Java Font definition:Dialog 700</desc>
</g> </g>
</g> </g></svg>
</svg>

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -1,8 +1,8 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!DOCTYPE test [ <!DOCTYPE test [
<!ELEMENT test (#PCDATA)>
<!ENTITY % xx "&#37;zz;"> <!ENTITY % xx "&#37;zz;">
<!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >'> <!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >'>
<!ENTITY tricky "error-prone"> <!ENTITY tricky "error-prone">
<!ELEMENT test (#PCDATA)>
]> ]>
<test>This sample shows a error-prone method.</test> <test>This sample shows a error-prone method.</test>

View File

@ -2,14 +2,12 @@
<RDF:RDF xmlns:RDF="http://www.w3.org/TR/WD-rdf-syntax#" p3p="http//www.w3.org/TR/1998/WD-P3P10-syntax#proposal.DTD"> <RDF:RDF xmlns:RDF="http://www.w3.org/TR/WD-rdf-syntax#" p3p="http//www.w3.org/TR/1998/WD-P3P10-syntax#proposal.DTD">
<PROP realm="http://www.CoolCatalog.com/catalogue/" entity="CoolCatalog" agreeID="94df1293a3e519bb" assurance="http://www.TrustUs.org"> <PROP realm="http://www.CoolCatalog.com/catalogue/" entity="CoolCatalog" agreeID="94df1293a3e519bb" assurance="http://www.TrustUs.org">
<USES> <USES>
<STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you&apos;d appreciate."> <STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you'd appreciate.">
<WITH> <WITH><PREFIX name="User.">
<PREFIX name="User.">
<REF name="Name.First"/> <REF name="Name.First"/>
<REF name="Bdate.Year" optional="1"/> <REF name="Bdate.Year" optional="1"/>
<REF name="Gender"/> <REF name="Gender"/>
</PREFIX> </PREFIX></WITH>
</WITH>
</STATEMENT> </STATEMENT>
</USES> </USES>
<USES> <USES>
@ -18,5 +16,4 @@
</STATEMENT> </STATEMENT>
</USES> </USES>
<DISCLOSURE discURI="http://www.CoolCatalog.com/PrivacyPractice.html" access="3" other="0,1"/> <DISCLOSURE discURI="http://www.CoolCatalog.com/PrivacyPractice.html" access="3" other="0,1"/>
</PROP> </PROP></RDF:RDF>
</RDF:RDF>

View File

@ -11,11 +11,11 @@
<RPM:Packager>Till Bubeck &lt;bubeck@delix.de&gt;, Ngo Than &lt;than@delix.de&gt;</RPM:Packager> <RPM:Packager>Till Bubeck &lt;bubeck@delix.de&gt;, Ngo Than &lt;than@delix.de&gt;</RPM:Packager>
<RPM:Group>Libraries</RPM:Group> <RPM:Group>Libraries</RPM:Group>
<RPM:Summary>Bibliothek zur Ansteuerung von Terminals</RPM:Summary> <RPM:Summary>Bibliothek zur Ansteuerung von Terminals</RPM:Summary>
<RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#228;ngige <RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#xE4;ngige
Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#252;gung, die Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#xFC;gung, die
speziell optimiert sind. speziell optimiert sind.
Diese Version ist die &apos;new curses&apos; (ncurses) Variante und ist der Diese Version ist die 'new curses' (ncurses) Variante und ist der
anerkannte Ersatz f&#252;r die klassische Curses-Library, die nicht mehr anerkannte Ersatz f&#xFC;r die klassische Curses-Library, die nicht mehr
weiterentwickelt wird.</RPM:Description> weiterentwickelt wird.</RPM:Description>
<RPM:Copyright>GPL</RPM:Copyright> <RPM:Copyright>GPL</RPM:Copyright>
<RPM:Sources>ncurses4-4.2-3.src.rpm</RPM:Sources> <RPM:Sources>ncurses4-4.2-3.src.rpm</RPM:Sources>

View File

@ -1,51 +1,63 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/"> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/">
<channel> <channel>
<title>Slashdot:News for Nerds. Stuff that Matters.</title> <title>Slashdot:News for Nerds. Stuff that Matters.</title>
<link>http://slashdot.org/</link> <link>http://slashdot.org/</link>
<description>News for Nerds. Stuff that Matters</description> <description>News for Nerds. Stuff that Matters</description>
</channel> </channel>
<image> <image>
<title>Slashdot</title> <title>Slashdot</title>
<url>http://slashdot.org/images/slashdotlg.gif</url> <url>http://slashdot.org/images/slashdotlg.gif</url>
<link>http://slashdot.org</link> <link>http://slashdot.org</link>
</image> </image>
<item> <item>
<title>100 Mbit/s on Fibre to the home</title> <title>100 Mbit/s on Fibre to the home</title>
<link>http://slashdot.org/articles/99/06/06/1440211.shtml</link> <link>http://slashdot.org/articles/99/06/06/1440211.shtml</link>
</item> </item>
<item> <item>
<title>Gimp 1.2 Preview</title> <title>Gimp 1.2 Preview</title>
<link>http://slashdot.org/articles/99/06/06/1438246.shtml</link> <link>http://slashdot.org/articles/99/06/06/1438246.shtml</link>
</item> </item>
<item> <item>
<title>Sony&apos;s AIBO robot Sold Out</title> <title>Sony's AIBO robot Sold Out</title>
<link>http://slashdot.org/articles/99/06/06/1432256.shtml</link> <link>http://slashdot.org/articles/99/06/06/1432256.shtml</link>
</item> </item>
<item> <item>
<title>Ask Slashdot: Another Word for &quot;Hacker&quot;?</title> <title>Ask Slashdot: Another Word for &quot;Hacker&quot;?</title>
<link>http://slashdot.org/askslashdot/99/06/05/1815225.shtml</link> <link>http://slashdot.org/askslashdot/99/06/05/1815225.shtml</link>
</item> </item>
<item> <item>
<title>Corel Linux FAQ</title> <title>Corel Linux FAQ</title>
<link>http://slashdot.org/articles/99/06/05/1842218.shtml</link> <link>http://slashdot.org/articles/99/06/05/1842218.shtml</link>
</item> </item>
<item> <item>
<title>Upside downsides MP3.COM.</title> <title>Upside downsides MP3.COM.</title>
<link>http://slashdot.org/articles/99/06/05/1558210.shtml</link> <link>http://slashdot.org/articles/99/06/05/1558210.shtml</link>
</item> </item>
<item> <item>
<title>2 Terabits of Bandwidth</title> <title>2 Terabits of Bandwidth</title>
<link>http://slashdot.org/articles/99/06/05/1554258.shtml</link> <link>http://slashdot.org/articles/99/06/05/1554258.shtml</link>
</item> </item>
<item> <item>
<title>Suppression of cold fusion research?</title> <title>Suppression of cold fusion research?</title>
<link>http://slashdot.org/articles/99/06/04/2313200.shtml</link> <link>http://slashdot.org/articles/99/06/04/2313200.shtml</link>
</item> </item>
<item> <item>
<title>California Gov. Halts Wage Info Sale</title> <title>California Gov. Halts Wage Info Sale</title>
<link>http://slashdot.org/articles/99/06/04/235256.shtml</link> <link>http://slashdot.org/articles/99/06/04/235256.shtml</link>
</item> </item>
<item> <item>
<title>Red Hat Announces IPO</title> <title>Red Hat Announces IPO</title>
<link>http://slashdot.org/articles/99/06/04/0849207.shtml</link> <link>http://slashdot.org/articles/99/06/04/0849207.shtml</link>

View File

@ -5,7 +5,7 @@
<url>http://slashdot.org/articles/99/06/06/1440211.shtml</url> <url>http://slashdot.org/articles/99/06/06/1440211.shtml</url>
<time>1999-06-06 14:39:59</time> <time>1999-06-06 14:39:59</time>
<author>CmdrTaco</author> <author>CmdrTaco</author>
<department>wouldn&apos;t-it-be-nice</department> <department>wouldn't-it-be-nice</department>
<topic>internet</topic> <topic>internet</topic>
<comments>20</comments> <comments>20</comments>
<section>articles</section> <section>articles</section>
@ -23,7 +23,7 @@
<image>topicgimp.gif</image> <image>topicgimp.gif</image>
</story> </story>
<story> <story>
<title>Sony&apos;s AIBO robot Sold Out</title> <title>Sony's AIBO robot Sold Out</title>
<url>http://slashdot.org/articles/99/06/06/1432256.shtml</url> <url>http://slashdot.org/articles/99/06/06/1432256.shtml</url>
<time>1999-06-06 14:32:51</time> <time>1999-06-06 14:32:51</time>
<author>CmdrTaco</author> <author>CmdrTaco</author>

View File

@ -1,7 +1,8 @@
<?xml version="1.0" standalone="yes"?> <?xml version="1.0" standalone="yes"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd"> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
<svg width="242px" height="383px"> <svg width="242px" height="383px">
<g style="stroke: #000000"/> <g style="stroke: #000000">
</g>
<g style="fill: #f2cc99"> <g style="fill: #f2cc99">
<polyline verts=" 69,18 82,8 99,3 118,5 135,12 149,21 156,13 165,9 177,13 183,28 180,50 164,91 155,107 154,114 151,121 141,127 139,136 155,206 157,251 126,342 133,357 128,376 83,376 75,368 67,350 61,350 53,369 4,369 2,361 5,354 12,342 16,321 4,257 4,244 7,218 9,179 26,127 43,93 32,77 30,70 24,67 16,49 17,35 18,23 30,12 40,7 53,7 62,12 69,18 69,18 69,18"/> <polyline verts=" 69,18 82,8 99,3 118,5 135,12 149,21 156,13 165,9 177,13 183,28 180,50 164,91 155,107 154,114 151,121 141,127 139,136 155,206 157,251 126,342 133,357 128,376 83,376 75,368 67,350 61,350 53,369 4,369 2,361 5,354 12,342 16,321 4,257 4,244 7,218 9,179 26,127 43,93 32,77 30,70 24,67 16,49 17,35 18,23 30,12 40,7 53,7 62,12 69,18 69,18 69,18"/>
</g> </g>
@ -157,5 +158,4 @@
<polyline verts=" 147,338 142,341 143,345 141,354 147,343 147,338 147,338 147,338"/> <polyline verts=" 147,338 142,341 143,345 141,354 147,343 147,338 147,338 147,338"/>
<polyline verts=" 157,342 156,349 150,356 157,353 163,346 162,342 157,342 157,342 157,342"/> <polyline verts=" 157,342 156,349 150,356 157,353 163,346 162,342 157,342 157,342 157,342"/>
<polyline verts=" 99,265 96,284 92,299 73,339 73,333 87,300 99,265 99,265 99,265"/> <polyline verts=" 99,265 96,284 92,299 73,339 73,333 87,300 99,265 99,265 99,265"/>
</g> </g></svg>
</svg>

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -8,7 +8,8 @@
<g style="stroke: #800040"> <g style="stroke: #800040">
<polyline verts=" 32,100 72,50 90,82 73,16 120,64 152,9 177,107"/> <polyline verts=" 32,100 72,50 90,82 73,16 120,64 152,9 177,107"/>
</g> </g>
<g style="stroke: #000000"/> <g style="stroke: #000000">
</g>
<g style="stroke: #0000ff"> <g style="stroke: #0000ff">
<rect x="30" y="101" width="51" height="33"/> <rect x="30" y="101" width="51" height="33"/>
</g> </g>
@ -38,11 +39,13 @@
<g style="stroke: #008080"> <g style="stroke: #008080">
<text x="176" y="85">sadfsadfsad</text> <text x="176" y="85">sadfsadfsad</text>
</g> </g>
<g style="stroke: #000000"/> <g style="stroke: #000000">
</g>
<g style="fill: #800040"> <g style="fill: #800040">
<ellipse cx="208" cy="180" major="45" minor="31" angle="0"/> <ellipse cx="208" cy="180" major="45" minor="31" angle="0"/>
</g> </g>
<g style="stroke: #000000"/> <g style="stroke: #000000">
</g>
<g style="fill: #ffffff"> <g style="fill: #ffffff">
<g> <g>
<desc> Java Font definition:Dialog 700</desc> <desc> Java Font definition:Dialog 700</desc>
@ -50,5 +53,4 @@
<g> <g>
<desc> Java Font definition:Dialog 700</desc> <desc> Java Font definition:Dialog 700</desc>
</g> </g>
</g> </g></svg>
</svg>

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -1,6 +1,8 @@
<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?> <?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
<!DOCTYPE spec SYSTEM "dtds/spec.dtd" [ <!DOCTYPE spec SYSTEM "dtds/spec.dtd" [
<!ENTITY XML.version "1.0"> <!-- LAST TOUCHED BY: Tim Bray, 8 February 1997 --><!-- The words 'FINAL EDIT' in comments mark places where changes
need to be made after approval of the document by the ERB, before
publication. --><!ENTITY XML.version "1.0">
<!ENTITY doc.date "10 February 1998"> <!ENTITY doc.date "10 February 1998">
<!ENTITY iso6.doc.date "19980210"> <!ENTITY iso6.doc.date "19980210">
<!ENTITY w3c.doc.date "02-Feb-1998"> <!ENTITY w3c.doc.date "02-Feb-1998">
@ -15,27 +17,20 @@
<!ENTITY br "\n"> <!ENTITY br "\n">
<!ENTITY cellback "#c0d9c0"> <!ENTITY cellback "#c0d9c0">
<!ENTITY mdash "--"> <!ENTITY mdash "--">
<!ENTITY com "--"> <!-- &#x2014, but nsgmls doesn't grok hex --><!ENTITY com "--">
<!ENTITY como "--"> <!ENTITY como "--">
<!ENTITY comc "--"> <!ENTITY comc "--">
<!ENTITY hcro "&amp;#x"> <!ENTITY hcro "&amp;#x">
<!ENTITY nbsp "&#160;"> <!-- <!ENTITY nbsp "<22>"> --><!ENTITY nbsp "&#160;">
<!ENTITY magicents "<code>amp</code>, <!ENTITY magicents "<code>amp</code>,
<code>lt</code>, <code>lt</code>,
<code>gt</code>, <code>gt</code>,
<code>apos</code>, <code>apos</code>,
<code>quot</code>"> <code>quot</code>">
<!ENTITY doc.audience "public review and discussion"> <!-- audience and distribution status: for use at publication time --><!ENTITY doc.audience "public review and discussion">
<!ENTITY doc.distribution "may be distributed freely, as long as <!ENTITY doc.distribution "may be distributed freely, as long as
all text and legal notices remain intact"> all text and legal notices remain intact">
]> ]>
<!-- LAST TOUCHED BY: Tim Bray, 8 February 1997 -->
<!-- The words 'FINAL EDIT' in comments mark places where changes
need to be made after approval of the document by the ERB, before
publication. -->
<!-- &#x2014, but nsgmls doesn't grok hex -->
<!-- <!ENTITY nbsp "<22>"> -->
<!-- audience and distribution status: for use at publication time -->
<!-- for Panorama *--> <!-- for Panorama *-->
<?VERBATIM "eg" ?> <?VERBATIM "eg" ?>
<spec> <spec>
@ -110,7 +105,7 @@ HTML.</p>
other interested parties and has been endorsed by the other interested parties and has been endorsed by the
Director as a W3C Recommendation. It is a stable Director as a W3C Recommendation. It is a stable
document and may be used as reference material or cited document and may be used as reference material or cited
as a normative reference from another document. W3C&apos;s as a normative reference from another document. W3C's
role in making the Recommendation is to draw attention role in making the Recommendation is to draw attention
to the specification and to promote its widespread to the specification and to promote its widespread
deployment. This enhances the functionality and deployment. This enhances the functionality and
@ -155,24 +150,24 @@ entify hard-coded document date in pubdate element,
change expansion of entity WebSGML, change expansion of entity WebSGML,
update status description as per Dan Connolly (am not sure update status description as per Dan Connolly (am not sure
about refernece to Berners-Lee et al.), about refernece to Berners-Lee et al.),
add &apos;The&apos; to abstract as per WG decision, add 'The' to abstract as per WG decision,
move Relationship to Existing Standards to back matter and move Relationship to Existing Standards to back matter and
combine with References, combine with References,
re-order back matter so normative appendices come first, re-order back matter so normative appendices come first,
re-tag back matter so informative appendices are tagged informdiv1, re-tag back matter so informative appendices are tagged informdiv1,
remove XXX XXX from list of &apos;normative&apos; specs in prose, remove XXX XXX from list of 'normative' specs in prose,
move some references from Other References to Normative References, move some references from Other References to Normative References,
add RFC 1738, 1808, and 2141 to Other References (they are not add RFC 1738, 1808, and 2141 to Other References (they are not
normative since we do not require the processor to enforce any normative since we do not require the processor to enforce any
rules based on them), rules based on them),
add reference to &apos;Fielding draft&apos; (Berners-Lee et al.), add reference to 'Fielding draft' (Berners-Lee et al.),
move notation section to end of body, move notation section to end of body,
drop URIchar non-terminal and use SkipLit instead, drop URIchar non-terminal and use SkipLit instead,
lose stray reference to defunct nonterminal &apos;markupdecls&apos;, lose stray reference to defunct nonterminal 'markupdecls',
move reference to Aho et al. into appendix (Tim&apos;s right), move reference to Aho et al. into appendix (Tim's right),
add prose note saying that hash marks and fragment identifiers are add prose note saying that hash marks and fragment identifiers are
NOT part of the URI formally speaking, and are NOT legal in NOT part of the URI formally speaking, and are NOT legal in
system identifiers (processor &apos;may&apos; signal an error). system identifiers (processor 'may' signal an error).
Work through: Work through:
Tim Bray reacting to James Clark, Tim Bray reacting to James Clark,
Tim Bray on his own, Tim Bray on his own,
@ -180,7 +175,7 @@ Eve Maler,
NOT DONE YET: NOT DONE YET:
change binary / text to unparsed / parsed. change binary / text to unparsed / parsed.
handle James&apos;s suggestion about &lt; in attriubte values handle James's suggestion about &lt; in attriubte values
uppercase hex characters, uppercase hex characters,
namechar list, namechar list,
</sitem> </sitem>
@ -193,7 +188,7 @@ drop SDD from EncodingDecl,
change text at version number 1.0, change text at version number 1.0,
drop misleading (wrong!) sentence about ignorables and extenders, drop misleading (wrong!) sentence about ignorables and extenders,
modify definition of PCData to make bar on msc grammatical, modify definition of PCData to make bar on msc grammatical,
change grammar&apos;s handling of internal subset (drop non-terminal markupdecls), change grammar's handling of internal subset (drop non-terminal markupdecls),
change definition of includeSect to allow conditional sections, change definition of includeSect to allow conditional sections,
add integral-declaration constraint on internal subset, add integral-declaration constraint on internal subset,
drop misleading / dangerous sentence about relationship of drop misleading / dangerous sentence about relationship of
@ -207,14 +202,14 @@ Unicode character database (needs further work!).
for PE appearance.</sitem> for PE appearance.</sitem>
<sitem>1997-10-01 : TB : Case-sensitive markup; cleaned up <sitem>1997-10-01 : TB : Case-sensitive markup; cleaned up
element-type defs, lotsa little edits for style</sitem> element-type defs, lotsa little edits for style</sitem>
<sitem>1997-09-25 : TB : Change to elm&apos;s new DTD, with <sitem>1997-09-25 : TB : Change to elm's new DTD, with
substantial detail cleanup as a side-effect</sitem> substantial detail cleanup as a side-effect</sitem>
<sitem>1997-07-24 : CMSMcQ : correct error (lost *) in definition <sitem>1997-07-24 : CMSMcQ : correct error (lost *) in definition
of ignoreSectContents (thanks to Makoto Murata)</sitem> of ignoreSectContents (thanks to Makoto Murata)</sitem>
<sitem>Allow all empty elements to have end-tags, consistent with <sitem>Allow all empty elements to have end-tags, consistent with
SGML TC (as per JJC).</sitem> SGML TC (as per JJC).</sitem>
<sitem>1997-07-23 : CMSMcQ : pre-emptive strike on pending corrections: <sitem>1997-07-23 : CMSMcQ : pre-emptive strike on pending corrections:
introduce the term &apos;empty-element tag&apos;, note that all empty elements introduce the term 'empty-element tag', note that all empty elements
may use it, and elements declared EMPTY must use it. may use it, and elements declared EMPTY must use it.
Add WFC requiring encoding decl to come first in an entity. Add WFC requiring encoding decl to come first in an entity.
Redefine notations to point to PIs as well as binary entities. Redefine notations to point to PIs as well as binary entities.
@ -225,15 +220,15 @@ mixed and element content.
</sitem> </sitem>
<sitem>1997-06-30 : CMSMcQ : change date, some cosmetic changes, <sitem>1997-06-30 : CMSMcQ : change date, some cosmetic changes,
changes to productions for choice, seq, Mixed, NotationType, changes to productions for choice, seq, Mixed, NotationType,
Enumeration. Follow James Clark&apos;s suggestion and prohibit Enumeration. Follow James Clark's suggestion and prohibit
conditional sections in internal subset. TO DO: simplify conditional sections in internal subset. TO DO: simplify
production for ignored sections as a result, since we don&apos;t production for ignored sections as a result, since we don't
need to worry about parsers which don&apos;t expand PErefs finding need to worry about parsers which don't expand PErefs finding
a conditional section.</sitem> a conditional section.</sitem>
<sitem>1997-06-29 : TB : various edits</sitem> <sitem>1997-06-29 : TB : various edits</sitem>
<sitem>1997-06-29 : CMSMcQ : further changes: <sitem>1997-06-29 : CMSMcQ : further changes:
Suppress old FINAL EDIT comments and some dead material. Suppress old FINAL EDIT comments and some dead material.
Revise occurrences of % in grammar to exploit Henry Thompson&apos;s pun, Revise occurrences of % in grammar to exploit Henry Thompson's pun,
especially markupdecl and attdef. especially markupdecl and attdef.
Remove RMD requirement relating to element content (?). Remove RMD requirement relating to element content (?).
</sitem> </sitem>
@ -249,8 +244,8 @@ Change def of %operator.
Change standard definitions of lt, gt, amp. Change standard definitions of lt, gt, amp.
Strip leading zeros from #x00nn forms.</sitem> Strip leading zeros from #x00nn forms.</sitem>
<sitem>1997-04-02 : CMSMcQ : final corrections of editorial errors <sitem>1997-04-02 : CMSMcQ : final corrections of editorial errors
found in last night&apos;s proofreading. Reverse course once more on found in last night's proofreading. Reverse course once more on
well-formed: Webster&apos;s Second hyphenates it, and that&apos;s enough well-formed: Webster's Second hyphenates it, and that's enough
for me.</sitem> for me.</sitem>
<sitem>1997-04-01 : CMSMcQ : corrections from JJC, EM, HT, and self</sitem> <sitem>1997-04-01 : CMSMcQ : corrections from JJC, EM, HT, and self</sitem>
<sitem>1997-03-31 : Tim Bray : many changes</sitem> <sitem>1997-03-31 : Tim Bray : many changes</sitem>
@ -265,11 +260,11 @@ Paul Grosso, and self. Among other things: give in on &quot;well formed&quot;
(Terry is right), tentatively rename QuotedCData as AttValue (Terry is right), tentatively rename QuotedCData as AttValue
and Literal as EntityValue to be more informative, since attribute and Literal as EntityValue to be more informative, since attribute
values are the <emph>only</emph> place QuotedCData was used, and values are the <emph>only</emph> place QuotedCData was used, and
vice versa for entity text and Literal. (I&apos;d call it Entity Text, vice versa for entity text and Literal. (I'd call it Entity Text,
but 8879 uses that name for both internal and external entities.)</sitem> but 8879 uses that name for both internal and external entities.)</sitem>
<sitem>1997-03-26 : CMSMcQ : resynch the two forks of this draft, reapply <sitem>1997-03-26 : CMSMcQ : resynch the two forks of this draft, reapply
my changes dated 03-20 and 03-21. Normalize old &apos;may not&apos; to &apos;must not&apos; my changes dated 03-20 and 03-21. Normalize old 'may not' to 'must not'
except in the one case where it meant &apos;may or may not&apos;.</sitem> except in the one case where it meant 'may or may not'.</sitem>
<sitem>1997-03-21 : TB : massive changes on plane flight from Chicago <sitem>1997-03-21 : TB : massive changes on plane flight from Chicago
to Vancouver</sitem> to Vancouver</sitem>
<sitem>1997-03-21 : CMSMcQ : correct as many reported errors as possible. <sitem>1997-03-21 : CMSMcQ : correct as many reported errors as possible.
@ -280,12 +275,12 @@ WWW conference April 1997: restore some of the internal entity
references (e.g. to docdate, etc.), change character xA0 to &amp;nbsp; references (e.g. to docdate, etc.), change character xA0 to &amp;nbsp;
and define nbsp as &amp;#160;, and refill a lot of paragraphs for and define nbsp as &amp;#160;, and refill a lot of paragraphs for
legibility.</sitem> legibility.</sitem>
<sitem>1996-11-12 : CMSMcQ : revise using Tim&apos;s edits: <sitem>1996-11-12 : CMSMcQ : revise using Tim's edits:
Add list type of NUMBERED and change most lists either to Add list type of NUMBERED and change most lists either to
BULLETS or to NUMBERED. BULLETS or to NUMBERED.
Suppress QuotedNames, Names (not used). Suppress QuotedNames, Names (not used).
Correct trivial-grammar doc type decl. Correct trivial-grammar doc type decl.
Rename &apos;marked section&apos; as &apos;CDATA section&apos; passim. Rename 'marked section' as 'CDATA section' passim.
Also edits from James Clark: Also edits from James Clark:
Define the set of characters from which [^abc] subtracts. Define the set of characters from which [^abc] subtracts.
Charref should use just [0-9] not Digit. Charref should use just [0-9] not Digit.
@ -293,9 +288,9 @@ Location info needs cleaner treatment: remove? (ERB
question). question).
One example of a PI has wrong pic. One example of a PI has wrong pic.
Clarify discussion of encoding names. Clarify discussion of encoding names.
Encoding failure should lead to unspecified results; don&apos;t Encoding failure should lead to unspecified results; don't
prescribe error recovery. prescribe error recovery.
Don&apos;t require exposure of entity boundaries. Don't require exposure of entity boundaries.
Ignore white space in element content. Ignore white space in element content.
Reserve entity names of the form u-NNNN. Reserve entity names of the form u-NNNN.
Clarify relative URLs. Clarify relative URLs.
@ -313,17 +308,17 @@ Finish stylistic revision.</sitem>
<sitem>1996-10-31 : TB : Add Entity Handling section</sitem> <sitem>1996-10-31 : TB : Add Entity Handling section</sitem>
<sitem>1996-10-30 : TB : Clean up term &amp; termdef. Slip in <sitem>1996-10-30 : TB : Clean up term &amp; termdef. Slip in
ERB decision re EMPTY.</sitem> ERB decision re EMPTY.</sitem>
<sitem>1996-10-28 : TB : Change DTD. Implement some of Michael&apos;s <sitem>1996-10-28 : TB : Change DTD. Implement some of Michael's
suggestions. Change comments back to //. Introduce language for suggestions. Change comments back to //. Introduce language for
XML namespace reservation. Add section on white-space handling. XML namespace reservation. Add section on white-space handling.
Lots more cleanup.</sitem> Lots more cleanup.</sitem>
<sitem>1996-10-24 : CMSMcQ : quick tweaks, implement some ERB <sitem>1996-10-24 : CMSMcQ : quick tweaks, implement some ERB
decisions. Characters are not integers. Comments are /* */ not //. decisions. Characters are not integers. Comments are /* */ not //.
Add bibliographic refs to 10646, HyTime, Unicode. Add bibliographic refs to 10646, HyTime, Unicode.
Rename old Cdata as MsData since it&apos;s <emph>only</emph> seen Rename old Cdata as MsData since it's <emph>only</emph> seen
in marked sections. Call them attribute-value pairs not in marked sections. Call them attribute-value pairs not
name-value pairs, except once. Internal subset is optional, needs name-value pairs, except once. Internal subset is optional, needs
&apos;?&apos;. Implied attributes should be signaled to the app, not '?'. Implied attributes should be signaled to the app, not
have values supplied by processor.</sitem> have values supplied by processor.</sitem>
<sitem>1996-10-16 : TB : track down &amp; excise all DSD references; <sitem>1996-10-16 : TB : track down &amp; excise all DSD references;
introduce some EBNF for entity declarations.</sitem> introduce some EBNF for entity declarations.</sitem>
@ -340,10 +335,10 @@ Move old 2.2 XML Processors and Apps into intro.
Mention comments, PIs, and marked sections in discussion of Mention comments, PIs, and marked sections in discussion of
delimiter escaping. delimiter escaping.
Streamline discussion of doctype decl syntax. Streamline discussion of doctype decl syntax.
Drop old section of &apos;PI syntax&apos; for doctype decl, and add Drop old section of 'PI syntax' for doctype decl, and add
section on partial-DTD summary PIs to end of Logical Structures section on partial-DTD summary PIs to end of Logical Structures
section. section.
Revise DSD syntax section to use Tim&apos;s subset-in-a-PI Revise DSD syntax section to use Tim's subset-in-a-PI
mechanism.</sitem> mechanism.</sitem>
<sitem>1996-10-10 : TB : eliminate name recognizers (and more?)</sitem> <sitem>1996-10-10 : TB : eliminate name recognizers (and more?)</sitem>
<sitem>1996-10-09 : CMSMcQ : revise for style, consistency through 2.3 <sitem>1996-10-09 : CMSMcQ : revise for style, consistency through 2.3
@ -381,7 +376,7 @@ Parsed data is made up of <termref def="dt-character">characters</termref>,
some some
of which form <termref def="dt-chardata">character data</termref>, of which form <termref def="dt-chardata">character data</termref>,
and some of which form <termref def="dt-markup">markup</termref>. and some of which form <termref def="dt-markup">markup</termref>.
Markup encodes a description of the document&apos;s storage layout and Markup encodes a description of the document's storage layout and
logical structure. XML provides a mechanism to impose constraints on logical structure. XML provides a mechanism to impose constraints on
the storage layout and logical structure.</p> the storage layout and logical structure.</p>
<p><termdef id="dt-xml-proc" term="XML Processor">A software module <p><termdef id="dt-xml-proc" term="XML Processor">A software module
@ -400,7 +395,7 @@ It was chaired by Jon Bosak of Sun
Microsystems with the active participation of an XML Special Microsystems with the active participation of an XML Special
Interest Group (previously known as the SGML Working Group) also Interest Group (previously known as the SGML Working Group) also
organized by the W3C. The membership of the XML Working Group is given organized by the W3C. The membership of the XML Working Group is given
in an appendix. Dan Connolly served as the WG&apos;s contact with the W3C. in an appendix. Dan Connolly served as the WG's contact with the W3C.
</p> </p>
<p>The design goals for XML are:<olist><item><p>XML shall be straightforwardly usable over the <p>The design goals for XML are:<olist><item><p>XML shall be straightforwardly usable over the
Internet.</p></item><item><p>XML shall support a wide variety of applications.</p></item><item><p>XML shall be compatible with SGML.</p></item><item><p>It shall be easy to write programs which process XML Internet.</p></item><item><p>XML shall support a wide variety of applications.</p></item><item><p>XML shall be compatible with SGML.</p></item><item><p>It shall be easy to write programs which process XML
@ -447,7 +442,7 @@ the processor may make unprocessed data from the document (with
intermingled character data and markup) available to the application. intermingled character data and markup) available to the application.
Once a fatal error is detected, however, the processor must not Once a fatal error is detected, however, the processor must not
continue normal processing (i.e., it must not continue normal processing (i.e., it must not
continue to pass character data and information about the document&apos;s continue to pass character data and information about the document's
logical structure to the application in the normal way). logical structure to the application in the normal way).
</termdef></p></def></gitem><gitem><label>at user option</label><def><p>Conforming software may or must (depending on the modal verb in the </termdef></p></def></gitem><gitem><label>at user option</label><def><p>Conforming software may or must (depending on the modal verb in the
sentence) behave as described; if it does, it must sentence) behave as described; if it does, it must
@ -608,7 +603,7 @@ beginning with a letter or one of a few punctuation characters, and continuing
with letters, digits, hyphens, underscores, colons, or full stops, together with letters, digits, hyphens, underscores, colons, or full stops, together
known as name characters.</termdef> known as name characters.</termdef>
Names beginning with the string &quot;<code>xml</code>&quot;, or any string Names beginning with the string &quot;<code>xml</code>&quot;, or any string
which would match <code>((&apos;X&apos;|&apos;x&apos;) (&apos;M&apos;|&apos;m&apos;) (&apos;L&apos;|&apos;l&apos;))</code>, are which would match <code>(('X'|'x') ('M'|'m') ('L'|'l'))</code>, are
reserved for standardization in this or future versions of this reserved for standardization in this or future versions of this
specification. specification.
</p> </p>
@ -629,9 +624,9 @@ should accept the colon as a name character.</p>
name characters. name characters.
<scrap lang="ebnf"><head>Names and Tokens</head><prod id="NT-NameChar"><lhs>NameChar</lhs><rhs><nt def="NT-Letter">Letter</nt> <scrap lang="ebnf"><head>Names and Tokens</head><prod id="NT-NameChar"><lhs>NameChar</lhs><rhs><nt def="NT-Letter">Letter</nt>
| <nt def="NT-Digit">Digit</nt> | <nt def="NT-Digit">Digit</nt>
| &apos;.&apos; | &apos;-&apos; | &apos;_&apos; | &apos;:&apos; | '.' | '-' | '_' | ':'
| <nt def="NT-CombiningChar">CombiningChar</nt> | <nt def="NT-CombiningChar">CombiningChar</nt>
| <nt def="NT-Extender">Extender</nt></rhs></prod><prod id="NT-Name"><lhs>Name</lhs><rhs>(<nt def="NT-Letter">Letter</nt> | &apos;_&apos; | &apos;:&apos;) | <nt def="NT-Extender">Extender</nt></rhs></prod><prod id="NT-Name"><lhs>Name</lhs><rhs>(<nt def="NT-Letter">Letter</nt> | '_' | ':')
(<nt def="NT-NameChar">NameChar</nt>)*</rhs></prod><prod id="NT-Names"><lhs>Names</lhs><rhs><nt def="NT-Name">Name</nt> (<nt def="NT-NameChar">NameChar</nt>)*</rhs></prod><prod id="NT-Names"><lhs>Names</lhs><rhs><nt def="NT-Name">Name</nt>
(<nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>)*</rhs></prod><prod id="NT-Nmtoken"><lhs>Nmtoken</lhs><rhs>(<nt def="NT-NameChar">NameChar</nt>)+</rhs></prod><prod id="NT-Nmtokens"><lhs>Nmtokens</lhs><rhs><nt def="NT-Nmtoken">Nmtoken</nt> (<nt def="NT-S">S</nt> <nt def="NT-Nmtoken">Nmtoken</nt>)*</rhs></prod></scrap> (<nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>)*</rhs></prod><prod id="NT-Nmtoken"><lhs>Nmtoken</lhs><rhs>(<nt def="NT-NameChar">NameChar</nt>)+</rhs></prod><prod id="NT-Nmtokens"><lhs>Nmtokens</lhs><rhs><nt def="NT-Nmtoken">Nmtoken</nt> (<nt def="NT-S">S</nt> <nt def="NT-Nmtoken">Nmtoken</nt>)*</rhs></prod></scrap>
</p> </p>
@ -645,30 +640,30 @@ and external identifiers
(<nt def="NT-SystemLiteral">SystemLiteral</nt>). (<nt def="NT-SystemLiteral">SystemLiteral</nt>).
Note that a <nt def="NT-SystemLiteral">SystemLiteral</nt> Note that a <nt def="NT-SystemLiteral">SystemLiteral</nt>
can be parsed without scanning for markup. can be parsed without scanning for markup.
<scrap lang="ebnf"><head>Literals</head><prod id="NT-EntityValue"><lhs>EntityValue</lhs><rhs>&apos;&quot;&apos; <scrap lang="ebnf"><head>Literals</head><prod id="NT-EntityValue"><lhs>EntityValue</lhs><rhs>'&quot;'
([^%&amp;&quot;] ([^%&amp;&quot;]
| <nt def="NT-PEReference">PEReference</nt> | <nt def="NT-PEReference">PEReference</nt>
| <nt def="NT-Reference">Reference</nt>)* | <nt def="NT-Reference">Reference</nt>)*
&apos;&quot;&apos; '&quot;'
</rhs><rhs>|&nbsp; </rhs><rhs>|&nbsp;
&quot;&apos;&quot; &quot;'&quot;
([^%&amp;&apos;] ([^%&amp;']
| <nt def="NT-PEReference">PEReference</nt> | <nt def="NT-PEReference">PEReference</nt>
| <nt def="NT-Reference">Reference</nt>)* | <nt def="NT-Reference">Reference</nt>)*
&quot;&apos;&quot;</rhs></prod><prod id="NT-AttValue"><lhs>AttValue</lhs><rhs>&apos;&quot;&apos; &quot;'&quot;</rhs></prod><prod id="NT-AttValue"><lhs>AttValue</lhs><rhs>'&quot;'
([^&lt;&amp;&quot;] ([^&lt;&amp;&quot;]
| <nt def="NT-Reference">Reference</nt>)* | <nt def="NT-Reference">Reference</nt>)*
&apos;&quot;&apos; '&quot;'
</rhs><rhs>|&nbsp; </rhs><rhs>|&nbsp;
&quot;&apos;&quot; &quot;'&quot;
([^&lt;&amp;&apos;] ([^&lt;&amp;']
| <nt def="NT-Reference">Reference</nt>)* | <nt def="NT-Reference">Reference</nt>)*
&quot;&apos;&quot;</rhs></prod><prod id="NT-SystemLiteral"><lhs>SystemLiteral</lhs><rhs>(&apos;&quot;&apos; [^&quot;]* &apos;&quot;&apos;) |&nbsp;(&quot;&apos;&quot; [^&apos;]* &quot;&apos;&quot;) &quot;'&quot;</rhs></prod><prod id="NT-SystemLiteral"><lhs>SystemLiteral</lhs><rhs>('&quot;' [^&quot;]* '&quot;') |&nbsp;(&quot;'&quot; [^']* &quot;'&quot;)
</rhs></prod><prod id="NT-PubidLiteral"><lhs>PubidLiteral</lhs><rhs>&apos;&quot;&apos; <nt def="NT-PubidChar">PubidChar</nt>* </rhs></prod><prod id="NT-PubidLiteral"><lhs>PubidLiteral</lhs><rhs>'&quot;' <nt def="NT-PubidChar">PubidChar</nt>*
&apos;&quot;&apos; '&quot;'
| &quot;&apos;&quot; (<nt def="NT-PubidChar">PubidChar</nt> - &quot;&apos;&quot;)* &quot;&apos;&quot;</rhs></prod><prod id="NT-PubidChar"><lhs>PubidChar</lhs><rhs>#x20 | #xD | #xA | &quot;'&quot; (<nt def="NT-PubidChar">PubidChar</nt> - &quot;'&quot;)* &quot;'&quot;</rhs></prod><prod id="NT-PubidChar"><lhs>PubidChar</lhs><rhs>#x20 | #xD | #xA
|&nbsp;[a-zA-Z0-9] |&nbsp;[a-zA-Z0-9]
|&nbsp;[-&apos;()+,./:=?;!*#@$_%]</rhs></prod></scrap> |&nbsp;[-'()+,./:=?;!*#@$_%]</rhs></prod></scrap>
</p> </p>
</div2> </div2>
<div2 id="syntax"> <div2 id="syntax">
@ -729,10 +724,10 @@ is any string of characters not including the CDATA-section-close
delimiter, &quot;<code>]]&gt;</code>&quot;.</p> delimiter, &quot;<code>]]&gt;</code>&quot;.</p>
<p> <p>
To allow attribute values to contain both single and double quotes, the To allow attribute values to contain both single and double quotes, the
apostrophe or single-quote character (&apos;) may be represented as apostrophe or single-quote character (') may be represented as
&quot;<code>&amp;apos;</code>&quot;, and the double-quote character (&quot;) as &quot;<code>&amp;apos;</code>&quot;, and the double-quote character (&quot;) as
&quot;<code>&amp;quot;</code>&quot;. &quot;<code>&amp;quot;</code>&quot;.
<scrap lang="ebnf"><head>Character Data</head><prod id="NT-CharData"><lhs>CharData</lhs><rhs>[^&lt;&amp;]* - ([^&lt;&amp;]* &apos;]]&gt;&apos; [^&lt;&amp;]*)</rhs></prod></scrap> <scrap lang="ebnf"><head>Character Data</head><prod id="NT-CharData"><lhs>CharData</lhs><rhs>[^&lt;&amp;]* - ([^&lt;&amp;]* ']]&gt;' [^&lt;&amp;]*)</rhs></prod></scrap>
</p> </p>
</div2> </div2>
<div2 id="sec-comments"> <div2 id="sec-comments">
@ -743,17 +738,17 @@ appear anywhere in a document outside other
<termref def="dt-markup">markup</termref>; in addition, <termref def="dt-markup">markup</termref>; in addition,
they may appear within the document type declaration they may appear within the document type declaration
at places allowed by the grammar. at places allowed by the grammar.
They are not part of the document&apos;s <termref def="dt-chardata">character They are not part of the document's <termref def="dt-chardata">character
data</termref>; an XML data</termref>; an XML
processor may, but need not, make it possible for an application to processor may, but need not, make it possible for an application to
retrieve the text of comments. retrieve the text of comments.
<termref def="dt-compat">For compatibility</termref>, the string <termref def="dt-compat">For compatibility</termref>, the string
&quot;<code>--</code>&quot; (double-hyphen) must not occur within &quot;<code>--</code>&quot; (double-hyphen) must not occur within
comments. comments.
<scrap lang="ebnf"><head>Comments</head><prod id="NT-Comment"><lhs>Comment</lhs><rhs>&apos;&lt;!--&apos; <scrap lang="ebnf"><head>Comments</head><prod id="NT-Comment"><lhs>Comment</lhs><rhs>'&lt;!--'
((<nt def="NT-Char">Char</nt> - &apos;-&apos;) ((<nt def="NT-Char">Char</nt> - '-')
| (&apos;-&apos; (<nt def="NT-Char">Char</nt> - &apos;-&apos;)))* | ('-' (<nt def="NT-Char">Char</nt> - '-')))*
&apos;--&gt;&apos;</rhs></prod></scrap> '--&gt;'</rhs></prod></scrap>
</termdef> </termdef>
</p> </p>
<p>An example of a comment: <p>An example of a comment:
@ -766,13 +761,13 @@ comments.
instructions</term> (PIs) allow documents to contain instructions instructions</term> (PIs) allow documents to contain instructions
for applications. for applications.
<scrap lang="ebnf"><head>Processing Instructions</head><prod id="NT-PI"><lhs>PI</lhs><rhs>&apos;&lt;?&apos; <nt def="NT-PITarget">PITarget</nt> <scrap lang="ebnf"><head>Processing Instructions</head><prod id="NT-PI"><lhs>PI</lhs><rhs>'&lt;?' <nt def="NT-PITarget">PITarget</nt>
(<nt def="NT-S">S</nt> (<nt def="NT-S">S</nt>
(<nt def="NT-Char">Char</nt>* - (<nt def="NT-Char">Char</nt>* -
(<nt def="NT-Char">Char</nt>* &pic; <nt def="NT-Char">Char</nt>*)))? (<nt def="NT-Char">Char</nt>* &pic; <nt def="NT-Char">Char</nt>*)))?
&pic;</rhs></prod><prod id="NT-PITarget"><lhs>PITarget</lhs><rhs><nt def="NT-Name">Name</nt> - &pic;</rhs></prod><prod id="NT-PITarget"><lhs>PITarget</lhs><rhs><nt def="NT-Name">Name</nt> -
((&apos;X&apos; | &apos;x&apos;) (&apos;M&apos; | &apos;m&apos;) (&apos;L&apos; | &apos;l&apos;))</rhs></prod></scrap></termdef> (('X' | 'x') ('M' | 'm') ('L' | 'l'))</rhs></prod></scrap></termdef>
PIs are not part of the document&apos;s <termref def="dt-chardata">character PIs are not part of the document's <termref def="dt-chardata">character
data</termref>, but must be passed through to the application. The data</termref>, but must be passed through to the application. The
PI begins with a target (<nt def="NT-PITarget">PITarget</nt>) used PI begins with a target (<nt def="NT-PITarget">PITarget</nt>) used
to identify the application to which the instruction is directed. to identify the application to which the instruction is directed.
@ -796,9 +791,9 @@ string &quot;<code>&lt;![CDATA[</code>&quot; and end with the string
&quot;<code>]]&gt;</code>&quot;: &quot;<code>]]&gt;</code>&quot;:
<scrap lang="ebnf"><head>CDATA Sections</head><prod id="NT-CDSect"><lhs>CDSect</lhs><rhs><nt def="NT-CDStart">CDStart</nt> <scrap lang="ebnf"><head>CDATA Sections</head><prod id="NT-CDSect"><lhs>CDSect</lhs><rhs><nt def="NT-CDStart">CDStart</nt>
<nt def="NT-CData">CData</nt> <nt def="NT-CData">CData</nt>
<nt def="NT-CDEnd">CDEnd</nt></rhs></prod><prod id="NT-CDStart"><lhs>CDStart</lhs><rhs>&apos;&lt;![CDATA[&apos;</rhs></prod><prod id="NT-CData"><lhs>CData</lhs><rhs>(<nt def="NT-Char">Char</nt>* - <nt def="NT-CDEnd">CDEnd</nt></rhs></prod><prod id="NT-CDStart"><lhs>CDStart</lhs><rhs>'&lt;![CDATA['</rhs></prod><prod id="NT-CData"><lhs>CData</lhs><rhs>(<nt def="NT-Char">Char</nt>* -
(<nt def="NT-Char">Char</nt>* &apos;]]&gt;&apos; <nt def="NT-Char">Char</nt>*)) (<nt def="NT-Char">Char</nt>* ']]&gt;' <nt def="NT-Char">Char</nt>*))
</rhs></prod><prod id="NT-CDEnd"><lhs>CDEnd</lhs><rhs>&apos;]]&gt;&apos;</rhs></prod></scrap> </rhs></prod><prod id="NT-CDEnd"><lhs>CDEnd</lhs><rhs>']]&gt;'</rhs></prod></scrap>
Within a CDATA section, only the <nt def="NT-CDEnd">CDEnd</nt> string is Within a CDATA section, only the <nt def="NT-CDEnd">CDEnd</nt> string is
recognized as markup, so that left angle brackets and ampersands may occur in recognized as markup, so that left angle brackets and ampersands may occur in
@ -865,9 +860,9 @@ the first <termref def="dt-element">element</termref> in the document.
<nt def="NT-EncodingDecl">EncodingDecl</nt>? <nt def="NT-EncodingDecl">EncodingDecl</nt>?
<nt def="NT-SDDecl">SDDecl</nt>? <nt def="NT-SDDecl">SDDecl</nt>?
<nt def="NT-S">S</nt>? <nt def="NT-S">S</nt>?
&pic;</rhs></prod><prod id="NT-VersionInfo"><lhs>VersionInfo</lhs><rhs><nt def="NT-S">S</nt> &apos;version&apos; <nt def="NT-Eq">Eq</nt> &pic;</rhs></prod><prod id="NT-VersionInfo"><lhs>VersionInfo</lhs><rhs><nt def="NT-S">S</nt> 'version' <nt def="NT-Eq">Eq</nt>
(&apos; <nt def="NT-VersionNum">VersionNum</nt> &apos; (' <nt def="NT-VersionNum">VersionNum</nt> '
| &quot; <nt def="NT-VersionNum">VersionNum</nt> &quot;)</rhs></prod><prod id="NT-Eq"><lhs>Eq</lhs><rhs><nt def="NT-S">S</nt>? &apos;=&apos; <nt def="NT-S">S</nt>?</rhs></prod><prod id="NT-VersionNum"><lhs>VersionNum</lhs><rhs>([a-zA-Z0-9_.:] | &apos;-&apos;)+</rhs></prod><prod id="NT-Misc"><lhs>Misc</lhs><rhs><nt def="NT-Comment">Comment</nt> | <nt def="NT-PI">PI</nt> | | &quot; <nt def="NT-VersionNum">VersionNum</nt> &quot;)</rhs></prod><prod id="NT-Eq"><lhs>Eq</lhs><rhs><nt def="NT-S">S</nt>? '=' <nt def="NT-S">S</nt>?</rhs></prod><prod id="NT-VersionNum"><lhs>VersionNum</lhs><rhs>([a-zA-Z0-9_.:] | '-')+</rhs></prod><prod id="NT-Misc"><lhs>Misc</lhs><rhs><nt def="NT-Comment">Comment</nt> | <nt def="NT-PI">PI</nt> |
<nt def="NT-S">S</nt></rhs></prod></prodgroup></scrap></p> <nt def="NT-S">S</nt></rhs></prod></prodgroup></scrap></p>
<p><termdef id="dt-doctype" term="Document Type Declaration">The XML <p><termdef id="dt-doctype" term="Document Type Declaration">The XML
<term>document type declaration</term> <term>document type declaration</term>
@ -903,15 +898,15 @@ For fuller information, see
<prodgroup pcw2="6" pcw4="17.5" pcw5="9"> <prodgroup pcw2="6" pcw4="17.5" pcw5="9">
<prod id="NT-doctypedecl"> <prod id="NT-doctypedecl">
<lhs>doctypedecl</lhs> <lhs>doctypedecl</lhs>
<rhs>&apos;&lt;!DOCTYPE&apos; <nt def="NT-S">S</nt> <rhs>'&lt;!DOCTYPE' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt>
<nt def="NT-ExternalID">ExternalID</nt>)? <nt def="NT-ExternalID">ExternalID</nt>)?
<nt def="NT-S">S</nt>? (&apos;[&apos; <nt def="NT-S">S</nt>? ('['
(<nt def="NT-markupdecl">markupdecl</nt> (<nt def="NT-markupdecl">markupdecl</nt>
| <nt def="NT-PEReference">PEReference</nt> | <nt def="NT-PEReference">PEReference</nt>
| <nt def="NT-S">S</nt>)* | <nt def="NT-S">S</nt>)*
&apos;]&apos; ']'
<nt def="NT-S">S</nt>?)? &apos;&gt;&apos;</rhs> <nt def="NT-S">S</nt>?)? '&gt;'</rhs>
<vc def="vc-roottype"/> <vc def="vc-roottype"/>
</prod> </prod>
<prod id="NT-markupdecl"> <prod id="NT-markupdecl">
@ -1025,8 +1020,8 @@ whether or not there are such declarations which appear external to
the <termref def="dt-docent">document entity</termref>. the <termref def="dt-docent">document entity</termref>.
<scrap lang="ebnf" id="fulldtd"><head>Standalone Document Declaration</head><prodgroup pcw2="4" pcw4="19.5" pcw5="9"><prod id="NT-SDDecl"><lhs>SDDecl</lhs><rhs> <scrap lang="ebnf" id="fulldtd"><head>Standalone Document Declaration</head><prodgroup pcw2="4" pcw4="19.5" pcw5="9"><prod id="NT-SDDecl"><lhs>SDDecl</lhs><rhs>
<nt def="NT-S">S</nt> <nt def="NT-S">S</nt>
&apos;standalone&apos; <nt def="NT-Eq">Eq</nt> 'standalone' <nt def="NT-Eq">Eq</nt>
((&quot;&apos;&quot; (&apos;yes&apos; | &apos;no&apos;) &quot;&apos;&quot;) | (&apos;&quot;&apos; (&apos;yes&apos; | &apos;no&apos;) &apos;&quot;&apos;)) ((&quot;'&quot; ('yes' | 'no') &quot;'&quot;) | ('&quot;' ('yes' | 'no') '&quot;'))
</rhs><vc def="vc-check-rmd"/></prod></prodgroup></scrap></p> </rhs><vc def="vc-check-rmd"/></prod></prodgroup></scrap></p>
<p> <p>
In a standalone document declaration, the value &quot;<code>yes</code>&quot; indicates In a standalone document declaration, the value &quot;<code>yes</code>&quot; indicates
@ -1082,7 +1077,7 @@ directly within any instance of those types.
</item> </item>
</ulist> </ulist>
</vcnote> </vcnote>
<p>An example XML declaration with a standalone document declaration:<eg>&lt;?xml version=&quot;&XML.version;&quot; standalone=&apos;yes&apos;?&gt;</eg></p> <p>An example XML declaration with a standalone document declaration:<eg>&lt;?xml version=&quot;&XML.version;&quot; standalone='yes'?&gt;</eg></p>
</div2> </div2>
<div2 id="sec-white-space"> <div2 id="sec-white-space">
<head>White Space Handling</head> <head>White Space Handling</head>
@ -1111,7 +1106,7 @@ When declared, it must be given as an
<termref def="dt-enumerated">enumerated type</termref> whose only <termref def="dt-enumerated">enumerated type</termref> whose only
possible values are &quot;<code>default</code>&quot; and &quot;<code>preserve</code>&quot;. possible values are &quot;<code>default</code>&quot; and &quot;<code>preserve</code>&quot;.
For example:<eg><![CDATA[ <!ATTLIST poem xml:space (default|preserve) 'preserve'>]]></eg></p> For example:<eg><![CDATA[ <!ATTLIST poem xml:space (default|preserve) 'preserve'>]]></eg></p>
<p>The value &quot;<code>default</code>&quot; signals that applications&apos; <p>The value &quot;<code>default</code>&quot; signals that applications'
default white-space processing modes are acceptable for this element; the default white-space processing modes are acceptable for this element; the
value &quot;<code>preserve</code>&quot; indicates the intent that applications preserve value &quot;<code>preserve</code>&quot; indicates the intent that applications preserve
all the white space. all the white space.
@ -1158,9 +1153,9 @@ In valid documents, this attribute, like any other, must be
The values of the attribute are language identifiers as defined The values of the attribute are language identifiers as defined
by <bibref ref="RFC1766"/>, &quot;Tags for the Identification of Languages&quot;: by <bibref ref="RFC1766"/>, &quot;Tags for the Identification of Languages&quot;:
<scrap lang="ebnf"><head>Language Identification</head><prod id="NT-LanguageID"><lhs>LanguageID</lhs><rhs><nt def="NT-Langcode">Langcode</nt> <scrap lang="ebnf"><head>Language Identification</head><prod id="NT-LanguageID"><lhs>LanguageID</lhs><rhs><nt def="NT-Langcode">Langcode</nt>
(&apos;-&apos; <nt def="NT-Subcode">Subcode</nt>)*</rhs></prod><prod id="NT-Langcode"><lhs>Langcode</lhs><rhs><nt def="NT-ISO639Code">ISO639Code</nt> | ('-' <nt def="NT-Subcode">Subcode</nt>)*</rhs></prod><prod id="NT-Langcode"><lhs>Langcode</lhs><rhs><nt def="NT-ISO639Code">ISO639Code</nt> |
<nt def="NT-IanaCode">IanaCode</nt> | <nt def="NT-IanaCode">IanaCode</nt> |
<nt def="NT-UserCode">UserCode</nt></rhs></prod><prod id="NT-ISO639Code"><lhs>ISO639Code</lhs><rhs>([a-z] | [A-Z]) ([a-z] | [A-Z])</rhs></prod><prod id="NT-IanaCode"><lhs>IanaCode</lhs><rhs>(&apos;i&apos; | &apos;I&apos;) &apos;-&apos; ([a-z] | [A-Z])+</rhs></prod><prod id="NT-UserCode"><lhs>UserCode</lhs><rhs>(&apos;x&apos; | &apos;X&apos;) &apos;-&apos; ([a-z] | [A-Z])+</rhs></prod><prod id="NT-Subcode"><lhs>Subcode</lhs><rhs>([a-z] | [A-Z])+</rhs></prod></scrap> <nt def="NT-UserCode">UserCode</nt></rhs></prod><prod id="NT-ISO639Code"><lhs>ISO639Code</lhs><rhs>([a-z] | [A-Z]) ([a-z] | [A-Z])</rhs></prod><prod id="NT-IanaCode"><lhs>IanaCode</lhs><rhs>('i' | 'I') '-' ([a-z] | [A-Z])+</rhs></prod><prod id="NT-UserCode"><lhs>UserCode</lhs><rhs>('x' | 'X') '-' ([a-z] | [A-Z])+</rhs></prod><prod id="NT-Subcode"><lhs>Subcode</lhs><rhs>([a-z] | [A-Z])+</rhs></prod></scrap>
The <nt def="NT-Langcode">Langcode</nt> may be any of the following: The <nt def="NT-Langcode">Langcode</nt> may be any of the following:
<ulist><item><p>a two-letter language code as defined by <ulist><item><p>a two-letter language code as defined by
<bibref ref="ISO639"/>, &quot;Codes <bibref ref="ISO639"/>, &quot;Codes
@ -1258,14 +1253,14 @@ has a <termref def="dt-attrname">name</termref> and a <termref def="dt-attrval">
</scrap> </scrap>
<p>This specification does not constrain the semantics, use, or (beyond <p>This specification does not constrain the semantics, use, or (beyond
syntax) names of the element types and attributes, except that names syntax) names of the element types and attributes, except that names
beginning with a match to <code>((&apos;X&apos;|&apos;x&apos;)(&apos;M&apos;|&apos;m&apos;)(&apos;L&apos;|&apos;l&apos;))</code> beginning with a match to <code>(('X'|'x')('M'|'m')('L'|'l'))</code>
are reserved for standardization in this or future versions of this are reserved for standardization in this or future versions of this
specification. specification.
</p> </p>
<wfcnote id="GIMatch"> <wfcnote id="GIMatch">
<head>Element Type Match</head> <head>Element Type Match</head>
<p> <p>
The <nt def="NT-Name">Name</nt> in an element&apos;s end-tag must match The <nt def="NT-Name">Name</nt> in an element's end-tag must match
the element type in the element type in
the start-tag. the start-tag.
</p> </p>
@ -1309,13 +1304,13 @@ been declared.</p>
<head>Start-Tags, End-Tags, and Empty-Element Tags</head> <head>Start-Tags, End-Tags, and Empty-Element Tags</head>
<p><termdef id="dt-stag" term="Start-Tag">The beginning of every <p><termdef id="dt-stag" term="Start-Tag">The beginning of every
non-empty XML element is marked by a <term>start-tag</term>. non-empty XML element is marked by a <term>start-tag</term>.
<scrap lang="ebnf"><head>Start-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-STag"><lhs>STag</lhs><rhs>&apos;&lt;&apos; <nt def="NT-Name">Name</nt> <scrap lang="ebnf"><head>Start-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-STag"><lhs>STag</lhs><rhs>'&lt;' <nt def="NT-Name">Name</nt>
(<nt def="NT-S">S</nt> <nt def="NT-Attribute">Attribute</nt>)* (<nt def="NT-S">S</nt> <nt def="NT-Attribute">Attribute</nt>)*
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><wfc def="uniqattspec"/></prod><prod id="NT-Attribute"><lhs>Attribute</lhs><rhs><nt def="NT-Name">Name</nt> <nt def="NT-Eq">Eq</nt> <nt def="NT-S">S</nt>? '&gt;'</rhs><wfc def="uniqattspec"/></prod><prod id="NT-Attribute"><lhs>Attribute</lhs><rhs><nt def="NT-Name">Name</nt> <nt def="NT-Eq">Eq</nt>
<nt def="NT-AttValue">AttValue</nt></rhs><vc def="ValueType"/><wfc def="NoExternalRefs"/><wfc def="CleanAttrVals"/></prod></prodgroup></scrap> <nt def="NT-AttValue">AttValue</nt></rhs><vc def="ValueType"/><wfc def="NoExternalRefs"/><wfc def="CleanAttrVals"/></prod></prodgroup></scrap>
The <nt def="NT-Name">Name</nt> in The <nt def="NT-Name">Name</nt> in
the start- and end-tags gives the the start- and end-tags gives the
element&apos;s <term>type</term>.</termdef> element's <term>type</term>.</termdef>
<termdef id="dt-attr" term="Attribute"> <termdef id="dt-attr" term="Attribute">
The <nt def="NT-Name">Name</nt>-<nt def="NT-AttValue">AttValue</nt> pairs are The <nt def="NT-Name">Name</nt>-<nt def="NT-AttValue">AttValue</nt> pairs are
referred to as referred to as
@ -1325,7 +1320,7 @@ the <term>attribute specifications</term> of the element</termdef>,
referred to as the <term>attribute name</term></termdef> and referred to as the <term>attribute name</term></termdef> and
<termdef id="dt-attrval" term="Attribute Value">the content of the <termdef id="dt-attrval" term="Attribute Value">the content of the
<nt def="NT-AttValue">AttValue</nt> (the text between the <nt def="NT-AttValue">AttValue</nt> (the text between the
<code>&apos;</code> or <code>&quot;</code> delimiters) <code>'</code> or <code>&quot;</code> delimiters)
as the <term>attribute value</term>.</termdef> as the <term>attribute value</term>.</termdef>
</p> </p>
<wfcnote id="uniqattspec"> <wfcnote id="uniqattspec">
@ -1364,17 +1359,17 @@ a <code>&lt;</code>.
<termdef id="dt-etag" term="End Tag">The end of every element <termdef id="dt-etag" term="End Tag">The end of every element
that begins with a start-tag must that begins with a start-tag must
be marked by an <term>end-tag</term> be marked by an <term>end-tag</term>
containing a name that echoes the element&apos;s type as given in the containing a name that echoes the element's type as given in the
start-tag: start-tag:
<scrap lang="ebnf"><head>End-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-ETag"><lhs>ETag</lhs><rhs>&apos;&lt;/&apos; <nt def="NT-Name">Name</nt> <scrap lang="ebnf"><head>End-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-ETag"><lhs>ETag</lhs><rhs>'&lt;/' <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod></prodgroup></scrap> <nt def="NT-S">S</nt>? '&gt;'</rhs></prod></prodgroup></scrap>
</termdef> </termdef>
</p> </p>
<p>An example of an end-tag:<eg>&lt;/termdef&gt;</eg></p> <p>An example of an end-tag:<eg>&lt;/termdef&gt;</eg></p>
<p> <p>
<termdef id="dt-content" term="Content">The <termdef id="dt-content" term="Content">The
<termref def="dt-text">text</termref> between the start-tag and <termref def="dt-text">text</termref> between the start-tag and
end-tag is called the element&apos;s end-tag is called the element's
<term>content</term>: <term>content</term>:
<scrap lang="ebnf"><head>Content of Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-content"><lhs>content</lhs><rhs>(<nt def="NT-element">element</nt> | <nt def="NT-CharData">CharData</nt> <scrap lang="ebnf"><head>Content of Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-content"><lhs>content</lhs><rhs>(<nt def="NT-element">element</nt> | <nt def="NT-CharData">CharData</nt>
| <nt def="NT-Reference">Reference</nt> | <nt def="NT-CDSect">CDSect</nt> | <nt def="NT-Reference">Reference</nt> | <nt def="NT-CDSect">CDSect</nt>
@ -1386,9 +1381,9 @@ it must be represented either by a start-tag immediately followed
by an end-tag or by an empty-element tag.</termdef> by an end-tag or by an empty-element tag.</termdef>
<termdef id="dt-eetag" term="empty-element tag">An <termdef id="dt-eetag" term="empty-element tag">An
<term>empty-element tag</term> takes a special form: <term>empty-element tag</term> takes a special form:
<scrap lang="ebnf"><head>Tags for Empty Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-EmptyElemTag"><lhs>EmptyElemTag</lhs><rhs>&apos;&lt;&apos; <nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt> <scrap lang="ebnf"><head>Tags for Empty Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-EmptyElemTag"><lhs>EmptyElemTag</lhs><rhs>'&lt;' <nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt>
<nt def="NT-Attribute">Attribute</nt>)* <nt def="NT-S">S</nt>? <nt def="NT-Attribute">Attribute</nt>)* <nt def="NT-S">S</nt>?
&apos;/&gt;&apos;</rhs><wfc def="uniqattspec"/></prod></prodgroup></scrap> '/&gt;'</rhs><wfc def="uniqattspec"/></prod></prodgroup></scrap>
</termdef></p> </termdef></p>
<p>Empty-element tags may be used for any element which has no <p>Empty-element tags may be used for any element which has no
content, whether or not it is declared using the keyword content, whether or not it is declared using the keyword
@ -1409,7 +1404,7 @@ tag must be used, and can only be used, for elements which are
<termref def="dt-valid">validation</termref> purposes, <termref def="dt-valid">validation</termref> purposes,
be constrained be constrained
using element type and attribute-list declarations. using element type and attribute-list declarations.
An element type declaration constrains the element&apos;s An element type declaration constrains the element's
<termref def="dt-content">content</termref>. <termref def="dt-content">content</termref>.
</p> </p>
<p>Element type declarations often constrain which element types can <p>Element type declarations often constrain which element types can
@ -1419,12 +1414,12 @@ when a declaration mentions an element type for which no declaration
is provided, but this is not an error.</p> is provided, but this is not an error.</p>
<p><termdef id="dt-eldecl" term="Element Type declaration">An <term>element <p><termdef id="dt-eldecl" term="Element Type declaration">An <term>element
type declaration</term> takes the form: type declaration</term> takes the form:
<scrap lang="ebnf"><head>Element Type Declaration</head><prodgroup pcw2="5.5" pcw4="18" pcw5="9"><prod id="NT-elementdecl"><lhs>elementdecl</lhs><rhs>&apos;&lt;!ELEMENT&apos; <nt def="NT-S">S</nt> <scrap lang="ebnf"><head>Element Type Declaration</head><prodgroup pcw2="5.5" pcw4="18" pcw5="9"><prod id="NT-elementdecl"><lhs>elementdecl</lhs><rhs>'&lt;!ELEMENT' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt> <nt def="NT-S">S</nt>
<nt def="NT-contentspec">contentspec</nt> <nt def="NT-contentspec">contentspec</nt>
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><vc def="EDUnique"/></prod><prod id="NT-contentspec"><lhs>contentspec</lhs><rhs>&apos;EMPTY&apos; <nt def="NT-S">S</nt>? '&gt;'</rhs><vc def="EDUnique"/></prod><prod id="NT-contentspec"><lhs>contentspec</lhs><rhs>'EMPTY'
| &apos;ANY&apos; | 'ANY'
| <nt def="NT-Mixed">Mixed</nt> | <nt def="NT-Mixed">Mixed</nt>
| <nt def="NT-children">children</nt> | <nt def="NT-children">children</nt>
</rhs></prod></prodgroup></scrap> </rhs></prod></prodgroup></scrap>
@ -1461,14 +1456,14 @@ choice lists of content particles, or
sequence lists of content particles: sequence lists of content particles:
<scrap lang="ebnf"><head>Element-content Models</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-children"><lhs>children</lhs><rhs>(<nt def="NT-choice">choice</nt> <scrap lang="ebnf"><head>Element-content Models</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-children"><lhs>children</lhs><rhs>(<nt def="NT-choice">choice</nt>
| <nt def="NT-seq">seq</nt>) | <nt def="NT-seq">seq</nt>)
(&apos;?&apos; | &apos;*&apos; | &apos;+&apos;)?</rhs></prod><prod id="NT-cp"><lhs>cp</lhs><rhs>(<nt def="NT-Name">Name</nt> ('?' | '*' | '+')?</rhs></prod><prod id="NT-cp"><lhs>cp</lhs><rhs>(<nt def="NT-Name">Name</nt>
| <nt def="NT-choice">choice</nt> | <nt def="NT-choice">choice</nt>
| <nt def="NT-seq">seq</nt>) | <nt def="NT-seq">seq</nt>)
(&apos;?&apos; | &apos;*&apos; | &apos;+&apos;)?</rhs></prod><prod id="NT-choice"><lhs>choice</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? cp ('?' | '*' | '+')?</rhs></prod><prod id="NT-choice"><lhs>choice</lhs><rhs>'(' <nt def="NT-S">S</nt>? cp
( <nt def="NT-S">S</nt>? &apos;|&apos; <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )* ( <nt def="NT-S">S</nt>? '|' <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
<nt def="NT-S">S</nt>? &apos;)&apos;</rhs><vc def="vc-PEinGroup"/></prod><prod id="NT-seq"><lhs>seq</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? cp <nt def="NT-S">S</nt>? ')'</rhs><vc def="vc-PEinGroup"/></prod><prod id="NT-seq"><lhs>seq</lhs><rhs>'(' <nt def="NT-S">S</nt>? cp
( <nt def="NT-S">S</nt>? &apos;,&apos; <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )* ( <nt def="NT-S">S</nt>? ',' <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
<nt def="NT-S">S</nt>? &apos;)&apos;</rhs><vc def="vc-PEinGroup"/></prod></prodgroup></scrap> <nt def="NT-S">S</nt>? ')'</rhs><vc def="vc-PEinGroup"/></prod></prodgroup></scrap>
where each <nt def="NT-Name">Name</nt> is the type of an element which may where each <nt def="NT-Name">Name</nt> is the type of an element which may
appear as a <termref def="dt-parentchild">child</termref>. appear as a <termref def="dt-parentchild">child</termref>.
Any content Any content
@ -1532,14 +1527,14 @@ character data, optionally interspersed with
<termref def="dt-parentchild">child</termref> elements.</termdef> <termref def="dt-parentchild">child</termref> elements.</termdef>
In this case, the types of the child elements In this case, the types of the child elements
may be constrained, but not their order or their number of occurrences: may be constrained, but not their order or their number of occurrences:
<scrap lang="ebnf"><head>Mixed-content Declaration</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-Mixed"><lhs>Mixed</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? <scrap lang="ebnf"><head>Mixed-content Declaration</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-Mixed"><lhs>Mixed</lhs><rhs>'(' <nt def="NT-S">S</nt>?
&apos;#PCDATA&apos; '#PCDATA'
(<nt def="NT-S">S</nt>? (<nt def="NT-S">S</nt>?
&apos;|&apos; '|'
<nt def="NT-S">S</nt>? <nt def="NT-S">S</nt>?
<nt def="NT-Name">Name</nt>)* <nt def="NT-Name">Name</nt>)*
<nt def="NT-S">S</nt>? <nt def="NT-S">S</nt>?
&apos;)*&apos; </rhs><rhs>| &apos;(&apos; <nt def="NT-S">S</nt>? &apos;#PCDATA&apos; <nt def="NT-S">S</nt>? &apos;)&apos; ')*' </rhs><rhs>| '(' <nt def="NT-S">S</nt>? '#PCDATA' <nt def="NT-S">S</nt>? ')'
</rhs><vc def="vc-PEinGroup"/><vc def="vc-MixedChildrenUnique"/></prod></prodgroup></scrap> </rhs><vc def="vc-PEinGroup"/><vc def="vc-MixedChildrenUnique"/></prod></prodgroup></scrap>
where the <nt def="NT-Name">Name</nt>s give the types of elements where the <nt def="NT-Name">Name</nt>s give the types of elements
that may appear as children. that may appear as children.
@ -1575,10 +1570,10 @@ for attributes.</p></item></ulist>
<termdef id="dt-attdecl" term="Attribute-List Declaration"> <termdef id="dt-attdecl" term="Attribute-List Declaration">
<term>Attribute-list declarations</term> specify the name, data type, and default <term>Attribute-list declarations</term> specify the name, data type, and default
value (if any) of each attribute associated with a given element type: value (if any) of each attribute associated with a given element type:
<scrap lang="ebnf"><head>Attribute-list Declaration</head><prod id="NT-AttlistDecl"><lhs>AttlistDecl</lhs><rhs>&apos;&lt;!ATTLIST&apos; <nt def="NT-S">S</nt> <scrap lang="ebnf"><head>Attribute-list Declaration</head><prod id="NT-AttlistDecl"><lhs>AttlistDecl</lhs><rhs>'&lt;!ATTLIST' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-AttDef">AttDef</nt>* <nt def="NT-AttDef">AttDef</nt>*
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-AttDef"><lhs>AttDef</lhs><rhs><nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> <nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-AttDef"><lhs>AttDef</lhs><rhs><nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt> <nt def="NT-AttType">AttType</nt> <nt def="NT-S">S</nt> <nt def="NT-AttType">AttType</nt>
<nt def="NT-S">S</nt> <nt def="NT-DefaultDecl">DefaultDecl</nt></rhs></prod></scrap> <nt def="NT-S">S</nt> <nt def="NT-DefaultDecl">DefaultDecl</nt></rhs></prod></scrap>
The <nt def="NT-Name">Name</nt> in the The <nt def="NT-Name">Name</nt> in the
@ -1615,7 +1610,7 @@ and semantic constraints, as noted:
<scrap lang="ebnf"><head>Attribute Types</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-AttType"><lhs>AttType</lhs><rhs><nt def="NT-StringType">StringType</nt> <scrap lang="ebnf"><head>Attribute Types</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-AttType"><lhs>AttType</lhs><rhs><nt def="NT-StringType">StringType</nt>
| <nt def="NT-TokenizedType">TokenizedType</nt> | <nt def="NT-TokenizedType">TokenizedType</nt>
| <nt def="NT-EnumeratedType">EnumeratedType</nt> | <nt def="NT-EnumeratedType">EnumeratedType</nt>
</rhs></prod><prod id="NT-StringType"><lhs>StringType</lhs><rhs>&apos;CDATA&apos;</rhs></prod><prod id="NT-TokenizedType"><lhs>TokenizedType</lhs><rhs>&apos;ID&apos;</rhs><vc def="id"/><vc def="one-id-per-el"/><vc def="id-default"/><rhs>| &apos;IDREF&apos;</rhs><vc def="idref"/><rhs>| &apos;IDREFS&apos;</rhs><vc def="idref"/><rhs>| &apos;ENTITY&apos;</rhs><vc def="entname"/><rhs>| &apos;ENTITIES&apos;</rhs><vc def="entname"/><rhs>| &apos;NMTOKEN&apos;</rhs><vc def="nmtok"/><rhs>| &apos;NMTOKENS&apos;</rhs><vc def="nmtok"/></prod></prodgroup></scrap> </rhs></prod><prod id="NT-StringType"><lhs>StringType</lhs><rhs>'CDATA'</rhs></prod><prod id="NT-TokenizedType"><lhs>TokenizedType</lhs><rhs>'ID'</rhs><vc def="id"/><vc def="one-id-per-el"/><vc def="id-default"/><rhs>| 'IDREF'</rhs><vc def="idref"/><rhs>| 'IDREFS'</rhs><vc def="idref"/><rhs>| 'ENTITY'</rhs><vc def="entname"/><rhs>| 'ENTITIES'</rhs><vc def="entname"/><rhs>| 'NMTOKEN'</rhs><vc def="nmtok"/><rhs>| 'NMTOKENS'</rhs><vc def="nmtok"/></prod></prodgroup></scrap>
</p> </p>
<vcnote id="id"> <vcnote id="id">
<head>ID</head> <head>ID</head>
@ -1679,21 +1674,21 @@ of a list of values provided in the declaration</termdef>. There are two
kinds of enumerated types: kinds of enumerated types:
<scrap lang="ebnf"><head>Enumerated Attribute Types</head><prod id="NT-EnumeratedType"><lhs>EnumeratedType</lhs><rhs><nt def="NT-NotationType">NotationType</nt> <scrap lang="ebnf"><head>Enumerated Attribute Types</head><prod id="NT-EnumeratedType"><lhs>EnumeratedType</lhs><rhs><nt def="NT-NotationType">NotationType</nt>
| <nt def="NT-Enumeration">Enumeration</nt> | <nt def="NT-Enumeration">Enumeration</nt>
</rhs></prod><prod id="NT-NotationType"><lhs>NotationType</lhs><rhs>&apos;NOTATION&apos; </rhs></prod><prod id="NT-NotationType"><lhs>NotationType</lhs><rhs>'NOTATION'
<nt def="NT-S">S</nt> <nt def="NT-S">S</nt>
&apos;(&apos; '('
<nt def="NT-S">S</nt>? <nt def="NT-S">S</nt>?
<nt def="NT-Name">Name</nt> <nt def="NT-Name">Name</nt>
(<nt def="NT-S">S</nt>? &apos;|&apos; <nt def="NT-S">S</nt>? (<nt def="NT-S">S</nt>? '|' <nt def="NT-S">S</nt>?
<nt def="NT-Name">Name</nt>)* <nt def="NT-Name">Name</nt>)*
<nt def="NT-S">S</nt>? &apos;)&apos; <nt def="NT-S">S</nt>? ')'
</rhs><vc def="notatn"/></prod><prod id="NT-Enumeration"><lhs>Enumeration</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? </rhs><vc def="notatn"/></prod><prod id="NT-Enumeration"><lhs>Enumeration</lhs><rhs>'(' <nt def="NT-S">S</nt>?
<nt def="NT-Nmtoken">Nmtoken</nt> <nt def="NT-Nmtoken">Nmtoken</nt>
(<nt def="NT-S">S</nt>? &apos;|&apos; (<nt def="NT-S">S</nt>? '|'
<nt def="NT-S">S</nt>? <nt def="NT-S">S</nt>?
<nt def="NT-Nmtoken">Nmtoken</nt>)* <nt def="NT-Nmtoken">Nmtoken</nt>)*
<nt def="NT-S">S</nt>? <nt def="NT-S">S</nt>?
&apos;)&apos;</rhs><vc def="enum"/></prod></scrap> ')'</rhs><vc def="enum"/></prod></scrap>
A <kw>NOTATION</kw> attribute identifies a A <kw>NOTATION</kw> attribute identifies a
<termref def="dt-notation">notation</termref>, declared in the <termref def="dt-notation">notation</termref>, declared in the
DTD with associated system and/or public identifiers, to DTD with associated system and/or public identifiers, to
@ -1726,10 +1721,10 @@ enumerated attribute types of a single element type.
<head>Attribute Defaults</head> <head>Attribute Defaults</head>
<p>An <termref def="dt-attdecl">attribute declaration</termref> provides <p>An <termref def="dt-attdecl">attribute declaration</termref> provides
information on whether information on whether
the attribute&apos;s presence is required, and if not, how an XML processor should the attribute's presence is required, and if not, how an XML processor should
react if a declared attribute is absent in a document. react if a declared attribute is absent in a document.
<scrap lang="ebnf"><head>Attribute Defaults</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-DefaultDecl"><lhs>DefaultDecl</lhs><rhs>&apos;#REQUIRED&apos; <scrap lang="ebnf"><head>Attribute Defaults</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-DefaultDecl"><lhs>DefaultDecl</lhs><rhs>'#REQUIRED'
|&nbsp;&apos;#IMPLIED&apos; </rhs><rhs>| ((&apos;#FIXED&apos; S)? <nt def="NT-AttValue">AttValue</nt>)</rhs><vc def="RequiredAttr"/><vc def="defattrvalid"/><wfc def="CleanAttrVals"/><vc def="FixedAttr"/></prod></prodgroup></scrap> |&nbsp;'#IMPLIED' </rhs><rhs>| (('#FIXED' S)? <nt def="NT-AttValue">AttValue</nt>)</rhs><vc def="RequiredAttr"/><vc def="defattrvalid"/><wfc def="CleanAttrVals"/><vc def="FixedAttr"/></prod></prodgroup></scrap>
</p> </p>
<p>In an attribute declaration, <kw>#REQUIRED</kw> means that the <p>In an attribute declaration, <kw>#REQUIRED</kw> means that the
@ -1817,16 +1812,16 @@ included in, or excluded from, the logical structure of the DTD based on
the keyword which governs them.</termdef> the keyword which governs them.</termdef>
<scrap lang="ebnf"><head>Conditional Section</head><prodgroup pcw2="9" pcw4="14.5"><prod id="NT-conditionalSect"><lhs>conditionalSect</lhs><rhs><nt def="NT-includeSect">includeSect</nt> <scrap lang="ebnf"><head>Conditional Section</head><prodgroup pcw2="9" pcw4="14.5"><prod id="NT-conditionalSect"><lhs>conditionalSect</lhs><rhs><nt def="NT-includeSect">includeSect</nt>
| <nt def="NT-ignoreSect">ignoreSect</nt> | <nt def="NT-ignoreSect">ignoreSect</nt>
</rhs></prod><prod id="NT-includeSect"><lhs>includeSect</lhs><rhs>&apos;&lt;![&apos; S? &apos;INCLUDE&apos; S? &apos;[&apos; </rhs></prod><prod id="NT-includeSect"><lhs>includeSect</lhs><rhs>'&lt;![' S? 'INCLUDE' S? '['
<nt def="NT-extSubsetDecl">extSubsetDecl</nt> <nt def="NT-extSubsetDecl">extSubsetDecl</nt>
&apos;]]&gt;&apos; ']]&gt;'
</rhs></prod><prod id="NT-ignoreSect"><lhs>ignoreSect</lhs><rhs>&apos;&lt;![&apos; S? &apos;IGNORE&apos; S? &apos;[&apos; </rhs></prod><prod id="NT-ignoreSect"><lhs>ignoreSect</lhs><rhs>'&lt;![' S? 'IGNORE' S? '['
<nt def="NT-ignoreSectContents">ignoreSectContents</nt>* <nt def="NT-ignoreSectContents">ignoreSectContents</nt>*
&apos;]]&gt;&apos;</rhs></prod><prod id="NT-ignoreSectContents"><lhs>ignoreSectContents</lhs><rhs><nt def="NT-Ignore">Ignore</nt> ']]&gt;'</rhs></prod><prod id="NT-ignoreSectContents"><lhs>ignoreSectContents</lhs><rhs><nt def="NT-Ignore">Ignore</nt>
(&apos;&lt;![&apos; <nt def="NT-ignoreSectContents">ignoreSectContents</nt> &apos;]]&gt;&apos; ('&lt;![' <nt def="NT-ignoreSectContents">ignoreSectContents</nt> ']]&gt;'
<nt def="NT-Ignore">Ignore</nt>)*</rhs></prod><prod id="NT-Ignore"><lhs>Ignore</lhs><rhs><nt def="NT-Char">Char</nt>* - <nt def="NT-Ignore">Ignore</nt>)*</rhs></prod><prod id="NT-Ignore"><lhs>Ignore</lhs><rhs><nt def="NT-Char">Char</nt>* -
(<nt def="NT-Char">Char</nt>* (&apos;&lt;![&apos; | &apos;]]&gt;&apos;) (<nt def="NT-Char">Char</nt>* ('&lt;![' | ']]&gt;')
<nt def="NT-Char">Char</nt>*) <nt def="NT-Char">Char</nt>*)
</rhs></prod></prodgroup></scrap> </rhs></prod></prodgroup></scrap>
</p> </p>
@ -1854,8 +1849,8 @@ parameter-entity reference, the parameter entity must be replaced by its
content before the processor decides whether to content before the processor decides whether to
include or ignore the conditional section.</p> include or ignore the conditional section.</p>
<p>An example: <p>An example:
<eg>&lt;!ENTITY % draft &apos;INCLUDE&apos; &gt; <eg>&lt;!ENTITY % draft 'INCLUDE' &gt;
&lt;!ENTITY % final &apos;IGNORE&apos; &gt; &lt;!ENTITY % final 'IGNORE' &gt;
&lt;![%draft;[ &lt;![%draft;[
&lt;!ELEMENT book (comments*, title, body, supplements?)&gt; &lt;!ELEMENT book (comments*, title, body, supplements?)&gt;
@ -1903,7 +1898,7 @@ called the <termref def="dt-docent">document entity</termref>, which serves
as the starting point for the <termref def="dt-xml-proc">XML as the starting point for the <termref def="dt-xml-proc">XML
processor</termref> and may contain the whole document.</p> processor</termref> and may contain the whole document.</p>
<p>Entities may be either parsed or unparsed. <p>Entities may be either parsed or unparsed.
<termdef id="dt-parsedent" term="Text Entity">A <term>parsed entity&apos;s</term> <termdef id="dt-parsedent" term="Text Entity">A <term>parsed entity's</term>
contents are referred to as its contents are referred to as its
<termref def="dt-repltext">replacement text</termref>; <termref def="dt-repltext">replacement text</termref>;
this <termref def="dt-text">text</termref> is considered an this <termref def="dt-text">text</termref> is considered an
@ -1942,15 +1937,15 @@ a general entity with the same name are two distinct entities.
A <term>character reference</term> refers to a specific character in the A <term>character reference</term> refers to a specific character in the
ISO/IEC 10646 character set, for example one not directly accessible from ISO/IEC 10646 character set, for example one not directly accessible from
available input devices. available input devices.
<scrap lang="ebnf"><head>Character Reference</head><prod id="NT-CharRef"><lhs>CharRef</lhs><rhs>&apos;&amp;#&apos; [0-9]+ &apos;;&apos; </rhs><rhs>| &apos;&hcro;&apos; [0-9a-fA-F]+ &apos;;&apos;</rhs><wfc def="wf-Legalchar"/></prod></scrap> <scrap lang="ebnf"><head>Character Reference</head><prod id="NT-CharRef"><lhs>CharRef</lhs><rhs>'&amp;#' [0-9]+ ';' </rhs><rhs>| '&hcro;' [0-9a-fA-F]+ ';'</rhs><wfc def="wf-Legalchar"/></prod></scrap>
<wfcnote id="wf-Legalchar"><head>Legal Character</head><p>Characters referred to using character references must <wfcnote id="wf-Legalchar"><head>Legal Character</head><p>Characters referred to using character references must
match the production for match the production for
<termref def="NT-Char">Char</termref>.</p></wfcnote> <termref def="NT-Char">Char</termref>.</p></wfcnote>
If the character reference begins with &quot;<code>&amp;#x</code>&quot;, the digits and If the character reference begins with &quot;<code>&amp;#x</code>&quot;, the digits and
letters up to the terminating <code>;</code> provide a hexadecimal letters up to the terminating <code>;</code> provide a hexadecimal
representation of the character&apos;s code point in ISO/IEC 10646. representation of the character's code point in ISO/IEC 10646.
If it begins just with &quot;<code>&amp;#</code>&quot;, the digits up to the terminating If it begins just with &quot;<code>&amp;#</code>&quot;, the digits up to the terminating
<code>;</code> provide a decimal representation of the character&apos;s <code>;</code> provide a decimal representation of the character's
code point. code point.
</termdef> </termdef>
</p> </p>
@ -1974,7 +1969,7 @@ semicolon
</prod> </prod>
<prod id="NT-EntityRef"> <prod id="NT-EntityRef">
<lhs>EntityRef</lhs> <lhs>EntityRef</lhs>
<rhs>&apos;&amp;&apos; <nt def="NT-Name">Name</nt> &apos;;&apos;</rhs> <rhs>'&amp;' <nt def="NT-Name">Name</nt> ';'</rhs>
<wfc def="wf-entdeclared"/> <wfc def="wf-entdeclared"/>
<vc def="vc-entdeclared"/> <vc def="vc-entdeclared"/>
<wfc def="textent"/> <wfc def="textent"/>
@ -1982,7 +1977,7 @@ semicolon
</prod> </prod>
<prod id="NT-PEReference"> <prod id="NT-PEReference">
<lhs>PEReference</lhs> <lhs>PEReference</lhs>
<rhs>&apos;%&apos; <nt def="NT-Name">Name</nt> &apos;;&apos;</rhs> <rhs>'%' <nt def="NT-Name">Name</nt> ';'</rhs>
<vc def="vc-entdeclared"/> <vc def="vc-entdeclared"/>
<wfc def="norecursion"/> <wfc def="norecursion"/>
<wfc def="indtd"/> <wfc def="indtd"/>
@ -1992,7 +1987,7 @@ semicolon
<head>Entity Declared</head> <head>Entity Declared</head>
<p>In a document without any DTD, a document with only an internal <p>In a document without any DTD, a document with only an internal
DTD subset which contains no parameter entity references, or a document with DTD subset which contains no parameter entity references, or a document with
&quot;<code>standalone=&apos;yes&apos;</code>&quot;, &quot;<code>standalone='yes'</code>&quot;,
the <nt def="NT-Name">Name</nt> given in the entity reference must the <nt def="NT-Name">Name</nt> given in the entity reference must
<termref def="dt-match">match</termref> that in an <termref def="dt-match">match</termref> that in an
<titleref href="sec-entity-decl">entity declaration</titleref>, except that <titleref href="sec-entity-decl">entity declaration</titleref>, except that
@ -2007,12 +2002,12 @@ external parameter entities, a non-validating processor is
<titleref href="include-if-valid">not obligated to</titleref> read <titleref href="include-if-valid">not obligated to</titleref> read
and process their declarations; for such documents, the rule that and process their declarations; for such documents, the rule that
an entity must be declared is a well-formedness constraint only an entity must be declared is a well-formedness constraint only
if <titleref href="sec-rmd">standalone=&apos;yes&apos;</titleref>.</p> if <titleref href="sec-rmd">standalone='yes'</titleref>.</p>
</wfcnote> </wfcnote>
<vcnote id="vc-entdeclared"> <vcnote id="vc-entdeclared">
<head>Entity Declared</head> <head>Entity Declared</head>
<p>In a document with an external subset or external parameter <p>In a document with an external subset or external parameter
entities with &quot;<code>standalone=&apos;no&apos;</code>&quot;, entities with &quot;<code>standalone='no'</code>&quot;,
the <nt def="NT-Name">Name</nt> given in the entity reference must <termref def="dt-match">match</termref> that in an the <nt def="NT-Name">Name</nt> given in the entity reference must <termref def="dt-match">match</termref> that in an
<titleref href="sec-entity-decl">entity declaration</titleref>. <titleref href="sec-entity-decl">entity declaration</titleref>.
For interoperability, valid documents should declare the entities For interoperability, valid documents should declare the entities
@ -2062,11 +2057,11 @@ is classified &amp;security-level;.</eg></p>
<p><termdef id="dt-entdecl" term="entity declaration"> <p><termdef id="dt-entdecl" term="entity declaration">
Entities are declared thus: Entities are declared thus:
<scrap lang="ebnf"><head>Entity Declaration</head><prodgroup pcw2="5" pcw4="18.5"><prod id="NT-EntityDecl"><lhs>EntityDecl</lhs><rhs><nt def="NT-GEDecl">GEDecl</nt><!--</rhs><com>General entities</com> <scrap lang="ebnf"><head>Entity Declaration</head><prodgroup pcw2="5" pcw4="18.5"><prod id="NT-EntityDecl"><lhs>EntityDecl</lhs><rhs><nt def="NT-GEDecl">GEDecl</nt><!--</rhs><com>General entities</com>
<rhs>--> | <nt def="NT-PEDecl">PEDecl</nt></rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-GEDecl"><lhs>GEDecl</lhs><rhs>&apos;&lt;!ENTITY&apos; <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> <rhs>--> | <nt def="NT-PEDecl">PEDecl</nt></rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-GEDecl"><lhs>GEDecl</lhs><rhs>'&lt;!ENTITY' <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt> <nt def="NT-EntityDef">EntityDef</nt> <nt def="NT-S">S</nt> <nt def="NT-EntityDef">EntityDef</nt>
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-PEDecl"><lhs>PEDecl</lhs><rhs>&apos;&lt;!ENTITY&apos; <nt def="NT-S">S</nt> &apos;%&apos; <nt def="NT-S">S</nt> <nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-PEDecl"><lhs>PEDecl</lhs><rhs>'&lt;!ENTITY' <nt def="NT-S">S</nt> '%' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt> <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> <nt def="NT-S">S</nt>
<nt def="NT-PEDef">PEDef</nt> <nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-EntityDef"><lhs>EntityDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt> <nt def="NT-PEDef">PEDef</nt> <nt def="NT-S">S</nt>? '&gt;'</rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-EntityDef"><lhs>EntityDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt>
<!--</rhs> <!--</rhs>
<rhs>-->| (<nt def="NT-ExternalID">ExternalID</nt> <rhs>-->| (<nt def="NT-ExternalID">ExternalID</nt>
<nt def="NT-NDataDecl">NDataDecl</nt>?)</rhs><!-- <nt def='NT-ExternalDef'>ExternalDef</nt></rhs> --></prod><!-- FINAL EDIT: what happened to WFs here? --><prod id="NT-PEDef"><lhs>PEDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt> <nt def="NT-NDataDecl">NDataDecl</nt>?)</rhs><!-- <nt def='NT-ExternalDef'>ExternalDef</nt></rhs> --></prod><!-- FINAL EDIT: what happened to WFs here? --><prod id="NT-PEDef"><lhs>PEDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt>
@ -2107,12 +2102,12 @@ internal, it is an <term>external
entity</term>, declared as follows: entity</term>, declared as follows:
<scrap lang="ebnf"><head>External Entity Declaration</head><!-- <scrap lang="ebnf"><head>External Entity Declaration</head><!--
<prod id='NT-ExternalDef'><lhs>ExternalDef</lhs> <prod id='NT-ExternalDef'><lhs>ExternalDef</lhs>
<rhs></prod> --><prod id="NT-ExternalID"><lhs>ExternalID</lhs><rhs>&apos;SYSTEM&apos; <nt def="NT-S">S</nt> <rhs></prod> --><prod id="NT-ExternalID"><lhs>ExternalID</lhs><rhs>'SYSTEM' <nt def="NT-S">S</nt>
<nt def="NT-SystemLiteral">SystemLiteral</nt></rhs><rhs>| &apos;PUBLIC&apos; <nt def="NT-S">S</nt> <nt def="NT-SystemLiteral">SystemLiteral</nt></rhs><rhs>| 'PUBLIC' <nt def="NT-S">S</nt>
<nt def="NT-PubidLiteral">PubidLiteral</nt> <nt def="NT-PubidLiteral">PubidLiteral</nt>
<nt def="NT-S">S</nt> <nt def="NT-S">S</nt>
<nt def="NT-SystemLiteral">SystemLiteral</nt> <nt def="NT-SystemLiteral">SystemLiteral</nt>
</rhs></prod><prod id="NT-NDataDecl"><lhs>NDataDecl</lhs><rhs><nt def="NT-S">S</nt> &apos;NDATA&apos; <nt def="NT-S">S</nt> </rhs></prod><prod id="NT-NDataDecl"><lhs>NDataDecl</lhs><rhs><nt def="NT-S">S</nt> 'NDATA' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt></rhs><vc def="not-declared"/></prod></scrap> <nt def="NT-Name">Name</nt></rhs><vc def="not-declared"/></prod></scrap>
If the <nt def="NT-NDataDecl">NDataDecl</nt> is present, this is a If the <nt def="NT-NDataDecl">NDataDecl</nt> is present, this is a
general <termref def="dt-unparsed">unparsed general <termref def="dt-unparsed">unparsed
@ -2127,7 +2122,7 @@ The <nt def="NT-Name">Name</nt> must match the declared name of a
</vcnote> </vcnote>
<p><termdef id="dt-sysid" term="System Identifier">The <p><termdef id="dt-sysid" term="System Identifier">The
<nt def="NT-SystemLiteral">SystemLiteral</nt> <nt def="NT-SystemLiteral">SystemLiteral</nt>
is called the entity&apos;s <term>system identifier</term>. It is a URI, is called the entity's <term>system identifier</term>. It is a URI,
which may be used to retrieve the entity.</termdef> which may be used to retrieve the entity.</termdef>
Note that the hash mark (<code>#</code>) and fragment identifier Note that the hash mark (<code>#</code>) and fragment identifier
frequently used with URIs are not, formally, part of the URI itself; frequently used with URIs are not, formally, part of the URI itself;
@ -2151,7 +2146,7 @@ byte value).</p>
<p><termdef id="dt-pubid" term="Public identifier"> <p><termdef id="dt-pubid" term="Public identifier">
In addition to a system identifier, an external identifier may In addition to a system identifier, an external identifier may
include a <term>public identifier</term>.</termdef> include a <term>public identifier</term>.</termdef>
An XML processor attempting to retrieve the entity&apos;s content may use the public An XML processor attempting to retrieve the entity's content may use the public
identifier to try to generate an alternative URI. If the processor identifier to try to generate an alternative URI. If the processor
is unable to do so, it must use the URI specified in the system is unable to do so, it must use the URI specified in the system
literal. Before a match is attempted, all strings literal. Before a match is attempted, all strings
@ -2237,10 +2232,10 @@ Parsed entities which are stored in an encoding other than
UTF-8 or UTF-16 must begin with a <titleref href="TextDecl">text UTF-8 or UTF-16 must begin with a <titleref href="TextDecl">text
declaration</titleref> containing an encoding declaration: declaration</titleref> containing an encoding declaration:
<scrap lang="ebnf"><head>Encoding Declaration</head><prod id="NT-EncodingDecl"><lhs>EncodingDecl</lhs><rhs><nt def="NT-S">S</nt> <scrap lang="ebnf"><head>Encoding Declaration</head><prod id="NT-EncodingDecl"><lhs>EncodingDecl</lhs><rhs><nt def="NT-S">S</nt>
&apos;encoding&apos; <nt def="NT-Eq">Eq</nt> 'encoding' <nt def="NT-Eq">Eq</nt>
(&apos;&quot;&apos; <nt def="NT-EncName">EncName</nt> &apos;&quot;&apos; | ('&quot;' <nt def="NT-EncName">EncName</nt> '&quot;' |
&quot;&apos;&quot; <nt def="NT-EncName">EncName</nt> &quot;&apos;&quot; ) &quot;'&quot; <nt def="NT-EncName">EncName</nt> &quot;'&quot; )
</rhs></prod><prod id="NT-EncName"><lhs>EncName</lhs><rhs>[A-Za-z] ([A-Za-z0-9._] | &apos;-&apos;)*</rhs><com>Encoding name contains only Latin characters</com></prod></scrap> </rhs></prod><prod id="NT-EncName"><lhs>EncName</lhs><rhs>[A-Za-z] ([A-Za-z0-9._] | '-')*</rhs><com>Encoding name contains only Latin characters</com></prod></scrap>
In the <termref def="dt-docent">document entity</termref>, the encoding In the <termref def="dt-docent">document entity</termref>, the encoding
declaration is part of the <termref def="dt-xmldecl">XML declaration</termref>. declaration is part of the <termref def="dt-xmldecl">XML declaration</termref>.
The <nt def="NT-EncName">EncName</nt> is the name of the encoding used. The <nt def="NT-EncName">EncName</nt> is the name of the encoding used.
@ -2286,8 +2281,8 @@ an encoding declaration.</p>
<p>It is a <termref def="dt-fatal">fatal error</termref> when an XML processor <p>It is a <termref def="dt-fatal">fatal error</termref> when an XML processor
encounters an entity with an encoding that it is unable to process.</p> encounters an entity with an encoding that it is unable to process.</p>
<p>Examples of encoding declarations: <p>Examples of encoding declarations:
<eg>&lt;?xml encoding=&apos;UTF-8&apos;?&gt; <eg>&lt;?xml encoding='UTF-8'?&gt;
&lt;?xml encoding=&apos;EUC-JP&apos;?&gt;</eg></p> &lt;?xml encoding='EUC-JP'?&gt;</eg></p>
</div3> </div3>
</div2> </div2>
<div2 id="entproc"> <div2 id="entproc">
@ -2309,9 +2304,9 @@ the value of an
attribute which has been declared as type <kw>ENTITY</kw>, or as one of attribute which has been declared as type <kw>ENTITY</kw>, or as one of
the space-separated tokens in the value of an attribute which has been the space-separated tokens in the value of an attribute which has been
declared as type <kw>ENTITIES</kw>.</p></def></gitem><gitem><label>Reference in Entity Value</label><def><p>as a reference declared as type <kw>ENTITIES</kw>.</p></def></gitem><gitem><label>Reference in Entity Value</label><def><p>as a reference
within a parameter or internal entity&apos;s within a parameter or internal entity's
<termref def="dt-litentval">literal entity value</termref> in <termref def="dt-litentval">literal entity value</termref> in
the entity&apos;s declaration; corresponds to the nonterminal the entity's declaration; corresponds to the nonterminal
<nt def="NT-EntityValue">EntityValue</nt>.</p></def></gitem><gitem><label>Reference in DTD</label><def><p>as a reference within either the internal or external subsets of the <nt def="NT-EntityValue">EntityValue</nt>.</p></def></gitem><gitem><label>Reference in DTD</label><def><p>as a reference within either the internal or external subsets of the
<termref def="dt-doctype">DTD</termref>, but outside <termref def="dt-doctype">DTD</termref>, but outside
of an <nt def="NT-EntityValue">EntityValue</nt> or of an <nt def="NT-EntityValue">EntityValue</nt> or
@ -2470,7 +2465,7 @@ replacement text.
If the entity is external, and the processor is not If the entity is external, and the processor is not
attempting to validate the XML document, the attempting to validate the XML document, the
processor <termref def="dt-may">may</termref>, but need not, processor <termref def="dt-may">may</termref>, but need not,
include the entity&apos;s replacement text. include the entity's replacement text.
If a non-validating parser does not include the replacement text, If a non-validating parser does not include the replacement text,
it must inform the application that it recognized, but did not it must inform the application that it recognized, but did not
read, the entity.</p> read, the entity.</p>
@ -2479,7 +2474,7 @@ provided by the SGML and XML entity mechanism, primarily designed
to support modularity in authoring, is not necessarily to support modularity in authoring, is not necessarily
appropriate for other applications, in particular document browsing. appropriate for other applications, in particular document browsing.
Browsers, for example, when encountering an external parsed entity reference, Browsers, for example, when encountering an external parsed entity reference,
might choose to provide a visual indication of the entity&apos;s might choose to provide a visual indication of the entity's
presence and retrieve it for display only on demand. presence and retrieve it for display only on demand.
</p> </p>
</div3> </div3>
@ -2508,8 +2503,8 @@ For example, this is well-formed:
<eg><![CDATA[<!ENTITY % YN '"Yes"' > <eg><![CDATA[<!ENTITY % YN '"Yes"' >
<!ENTITY WhatHeSaid "He said &YN;" >]]></eg> <!ENTITY WhatHeSaid "He said &YN;" >]]></eg>
while this is not: while this is not:
<eg>&lt;!ENTITY EndAttr &quot;27&apos;&quot; &gt; <eg>&lt;!ENTITY EndAttr &quot;27'&quot; &gt;
&lt;element attribute=&apos;a-&amp;EndAttr;&gt;</eg> &lt;element attribute='a-&amp;EndAttr;&gt;</eg>
</p> </p>
</div3> </div3>
<div3 id="notify"> <div3 id="notify">
@ -2548,7 +2543,7 @@ entities to contain an integral number of grammatical tokens in the DTD.
<head>Construction of Internal Entity Replacement Text</head> <head>Construction of Internal Entity Replacement Text</head>
<p>In discussing the treatment <p>In discussing the treatment
of internal entities, it is of internal entities, it is
useful to distinguish two forms of the entity&apos;s value. useful to distinguish two forms of the entity's value.
<termdef id="dt-litentval" term="Literal Entity Value">The <term>literal <termdef id="dt-litentval" term="Literal Entity Value">The <term>literal
entity value</term> is the quoted string actually entity value</term> is the quoted string actually
present in the entity declaration, corresponding to the present in the entity declaration, corresponding to the
@ -2579,9 +2574,9 @@ For example, given the following declarations:
&#xA9; 1947 %pub;. &rights;" >]]></eg> &#xA9; 1947 %pub;. &rights;" >]]></eg>
then the replacement text for the entity &quot;<code>book</code>&quot; is: then the replacement text for the entity &quot;<code>book</code>&quot; is:
<eg>La Peste: Albert Camus, <eg>La Peste: Albert Camus,
&#169; 1947 &#201;ditions Gallimard. &amp;rights;</eg> © 1947 Éditions Gallimard. &amp;rights;</eg>
The general-entity reference &quot;<code>&amp;rights;</code>&quot; would be expanded The general-entity reference &quot;<code>&amp;rights;</code>&quot; would be expanded
should the reference &quot;<code>&amp;book;</code>&quot; appear in the document&apos;s should the reference &quot;<code>&amp;book;</code>&quot; appear in the document's
content or an attribute value.</p> content or an attribute value.</p>
<p>These simple rules may have complex interactions; for a detailed <p>These simple rules may have complex interactions; for a detailed
discussion of a difficult example, see discussion of a difficult example, see
@ -2642,11 +2637,11 @@ entity and attribute-list declarations and in attribute specifications,
and an external identifier for the notation which may allow an XML and an external identifier for the notation which may allow an XML
processor or its client application to locate a helper application processor or its client application to locate a helper application
capable of processing data in the given notation. capable of processing data in the given notation.
<scrap lang="ebnf"><head>Notation Declarations</head><prod id="NT-NotationDecl"><lhs>NotationDecl</lhs><rhs>&apos;&lt;!NOTATION&apos; <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> <scrap lang="ebnf"><head>Notation Declarations</head><prod id="NT-NotationDecl"><lhs>NotationDecl</lhs><rhs>'&lt;!NOTATION' <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt> <nt def="NT-S">S</nt>
(<nt def="NT-ExternalID">ExternalID</nt> | (<nt def="NT-ExternalID">ExternalID</nt> |
<nt def="NT-PublicID">PublicID</nt>) <nt def="NT-PublicID">PublicID</nt>)
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-PublicID"><lhs>PublicID</lhs><rhs>&apos;PUBLIC&apos; <nt def="NT-S">S</nt> <nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-PublicID"><lhs>PublicID</lhs><rhs>'PUBLIC' <nt def="NT-S">S</nt>
<nt def="NT-PubidLiteral">PubidLiteral</nt> <nt def="NT-PubidLiteral">PubidLiteral</nt>
</rhs></prod></scrap> </rhs></prod></scrap>
</termdef> </termdef>
@ -2683,7 +2678,7 @@ without any identification at all.</p>
<p>Conforming <termref def="dt-xml-proc">XML processors</termref> fall into two <p>Conforming <termref def="dt-xml-proc">XML processors</termref> fall into two
classes: validating and non-validating.</p> classes: validating and non-validating.</p>
<p>Validating and non-validating processors alike must report <p>Validating and non-validating processors alike must report
violations of this specification&apos;s well-formedness constraints violations of this specification's well-formedness constraints
in the content of the in the content of the
<termref def="dt-docent">document entity</termref> and any <termref def="dt-docent">document entity</termref> and any
other <termref def="dt-parsedent">parsed entities</termref> that other <termref def="dt-parsedent">parsed entities</termref> that
@ -2784,7 +2779,7 @@ with a value in the range(s) indicated (inclusive).</p></def></gitem><gitem><lab
with a value <emph>outside</emph> the with a value <emph>outside</emph> the
range indicated.</p></def></gitem><gitem><label><code>[^abc]</code>, <code>[^#xN#xN#xN]</code></label><def><p>matches any <termref def="dt-character">character</termref> range indicated.</p></def></gitem><gitem><label><code>[^abc]</code>, <code>[^#xN#xN#xN]</code></label><def><p>matches any <termref def="dt-character">character</termref>
with a value not among the characters given.</p></def></gitem><gitem><label><code>&quot;string&quot;</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref> with a value not among the characters given.</p></def></gitem><gitem><label><code>&quot;string&quot;</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref>
that given inside the double quotes.</p></def></gitem><gitem><label><code>&apos;string&apos;</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref> that given inside the double quotes.</p></def></gitem><gitem><label><code>'string'</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref>
that given inside the single quotes.</p></def></gitem></glist> that given inside the single quotes.</p></def></gitem></glist>
These symbols may be combined to match more complex patterns as follows, These symbols may be combined to match more complex patterns as follows,
where <code>A</code> and <code>B</code> represent simple expressions: where <code>A</code> and <code>B</code> represent simple expressions:
@ -2861,17 +2856,17 @@ Berners-Lee, T., R. Fielding, and L. Masinter.
Semantics</emph>. Semantics</emph>.
1997. 1997.
(Work in progress; see updates to RFC1738.)</bibl> (Work in progress; see updates to RFC1738.)</bibl>
<bibl id="ABK" key="Br&#252;ggemann-Klein">Br&#252;ggemann-Klein, Anne. <bibl id="ABK" key="Br<EFBFBD>ggemann-Klein">Br<EFBFBD>ggemann-Klein, Anne.
<emph>Regular Expressions into Finite Automata</emph>. <emph>Regular Expressions into Finite Automata</emph>.
Extended abstract in I. Simon, Hrsg., LATIN 1992, Extended abstract in I. Simon, Hrsg., LATIN 1992,
S. 97-98. Springer-Verlag, Berlin 1992. S. 97-98. Springer-Verlag, Berlin 1992.
Full Version in Theoretical Computer Science 120: 197-213, 1993. Full Version in Theoretical Computer Science 120: 197-213, 1993.
</bibl> </bibl>
<bibl id="ABKDW" key="Br&#252;ggemann-Klein and Wood">Br&#252;ggemann-Klein, Anne, <bibl id="ABKDW" key="Br<EFBFBD>ggemann-Klein and Wood">Br<EFBFBD>ggemann-Klein, Anne,
and Derick Wood. and Derick Wood.
<emph>Deterministic Regular Languages</emph>. <emph>Deterministic Regular Languages</emph>.
Universit&#228;t Freiburg, Institut f&#252;r Informatik, Universit<EFBFBD>t Freiburg, Institut f<EFBFBD>r Informatik,
Bericht 38, Oktober 1991. Bericht 38, Oktober 1991.
</bibl> </bibl>
<bibl id="Clark" key="Clark">James Clark. <bibl id="Clark" key="Clark">James Clark.
@ -3268,7 +3263,7 @@ rather than name characters, because the property file classifies
them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.</p></item><item><p>Characters #x20DD-#x20E0 are excluded (in accordance with them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.</p></item><item><p>Characters #x20DD-#x20E0 are excluded (in accordance with
Unicode, section 5.14).</p></item><item><p>Character #x00B7 is classified as an extender, because the Unicode, section 5.14).</p></item><item><p>Character #x00B7 is classified as an extender, because the
property list so identifies it.</p></item><item><p>Character #x0387 is added as a name character, because #x00B7 property list so identifies it.</p></item><item><p>Character #x0387 is added as a name character, because #x00B7
is its canonical equivalent.</p></item><item><p>Characters &apos;:&apos; and &apos;_&apos; are allowed as name-start characters.</p></item><item><p>Characters &apos;-&apos; and &apos;.&apos; are allowed as name characters.</p></item></ulist> is its canonical equivalent.</p></item><item><p>Characters ':' and '_' are allowed as name-start characters.</p></item><item><p>Characters '-' and '.' are allowed as name characters.</p></item></ulist>
</p> </p>
</div1> </div1>
<inform-div1 id="sec-xml-and-sgml"> <inform-div1 id="sec-xml-and-sgml">
@ -3365,7 +3360,7 @@ In this case, the two references to
<code>b</code> can be collapsed <code>b</code> can be collapsed
into a single reference, making the model read into a single reference, making the model read
<code>(b, (c | d))</code>. An initial <code>b</code> now clearly <code>(b, (c | d))</code>. An initial <code>b</code> now clearly
matches only a single name in the content model. The parser doesn&apos;t matches only a single name in the content model. The parser doesn't
need to look ahead to see what follows; either <code>c</code> or need to look ahead to see what follows; either <code>c</code> or
<code>d</code> would be accepted.</p> <code>d</code> would be accepted.</p>
<p>More formally: a finite state automaton may be constructed from the <p>More formally: a finite state automaton may be constructed from the
@ -3384,7 +3379,7 @@ and may be reported as an error.
</p> </p>
<p>Algorithms exist which allow many but not all non-deterministic <p>Algorithms exist which allow many but not all non-deterministic
content models to be reduced automatically to equivalent deterministic content models to be reduced automatically to equivalent deterministic
models; see Br&#252;ggemann-Klein 1991 <bibref ref="ABK"/>.</p> models; see Br<EFBFBD>ggemann-Klein 1991 <bibref ref="ABK"/>.</p>
</inform-div1> </inform-div1>
<inform-div1 id="sec-guessing"> <inform-div1 id="sec-guessing">
<head>Autodetection of Character Encodings</head> <head>Autodetection of Character Encodings</head>
@ -3408,10 +3403,10 @@ processor without, or with, any accompanying
<p> <p>
Because each XML entity not in UTF-8 or UTF-16 format <emph>must</emph> Because each XML entity not in UTF-8 or UTF-16 format <emph>must</emph>
begin with an XML encoding declaration, in which the first characters begin with an XML encoding declaration, in which the first characters
must be &apos;<code>&lt;?xml</code>&apos;, any conforming processor can detect, must be '<code>&lt;?xml</code>', any conforming processor can detect,
after two to four octets of input, which of the following cases apply. after two to four octets of input, which of the following cases apply.
In reading this list, it may help to know that in UCS-4, &apos;&lt;&apos; is In reading this list, it may help to know that in UCS-4, '&lt;' is
&quot;<code>#x0000003C</code>&quot; and &apos;?&apos; is &quot;<code>#x0000003F</code>&quot;, and the Byte &quot;<code>#x0000003C</code>&quot; and '?' is &quot;<code>#x0000003F</code>&quot;, and the Byte
Order Mark required of UTF-16 data streams is &quot;<code>#xFEFF</code>&quot;.</p> Order Mark required of UTF-16 data streams is &quot;<code>#xFEFF</code>&quot;.</p>
<p> <p>
<ulist><item><p><code>00 00 00 3C</code>: UCS-4, big-endian machine (1234 order)</p></item><item><p><code>3C 00 00 00</code>: UCS-4, little-endian machine (4321 order)</p></item><item><p><code>00 00 3C 00</code>: UCS-4, unusual octet order (2143)</p></item><item><p><code>00 3C 00 00</code>: UCS-4, unusual octet order (3412)</p></item><item><p><code>FE FF</code>: UTF-16, big-endian</p></item><item><p><code>FF FE</code>: UTF-16, little-endian</p></item><item><p><code>00 3C 00 3F</code>: UTF-16, big-endian, no Byte Order Mark <ulist><item><p><code>00 00 00 3C</code>: UCS-4, big-endian machine (1234 order)</p></item><item><p><code>3C 00 00 00</code>: UCS-4, little-endian machine (4321 order)</p></item><item><p><code>00 00 3C 00</code>: UCS-4, unusual octet order (2143)</p></item><item><p><code>00 3C 00 00</code>: UCS-4, unusual octet order (3412)</p></item><item><p><code>FE FF</code>: UTF-16, big-endian</p></item><item><p><code>FF FE</code>: UTF-16, little-endian</p></item><item><p><code>00 3C 00 3F</code>: UTF-16, big-endian, no Byte Order Mark
@ -3456,7 +3451,7 @@ character of input.
</p> </p>
<p> <p>
Like any self-labeling system, the XML encoding declaration will not Like any self-labeling system, the XML encoding declaration will not
work if any software changes the entity&apos;s character set or encoding work if any software changes the entity's character set or encoding
without updating the encoding declaration. Implementors of without updating the encoding declaration. Implementors of
character-encoding routines should be careful to ensure the accuracy character-encoding routines should be careful to ensure the accuracy
of the internal and external information used to label the entity. of the internal and external information used to label the entity.
@ -3556,7 +3551,7 @@ Co-editor</role>
<name>Joel Nava, Adobe</name> <name>Joel Nava, Adobe</name>
</member> </member>
<member> <member>
<name>Conleth O&apos;Connell, Vignette</name> <name>Conleth O'Connell, Vignette</name>
</member> </member>
<member> <member>
<name>Peter Sharpe, SoftQuad</name> <name>Peter Sharpe, SoftQuad</name>

View File

@ -3,39 +3,39 @@
<!ELEMENT diagram (diagramdata , layer*)> <!ELEMENT diagram (diagramdata , layer*)>
<!ELEMENT diagramdata (attribute)*> <!ELEMENT diagramdata (attribute)*>
<!ELEMENT layer (object | group)*> <!ELEMENT layer (object | group)*>
<!ELEMENT object (attribute* , connections?)>
<!ELEMENT connections (connection)*>
<!ELEMENT connection EMPTY>
<!ELEMENT group (object | group)*>
<!ELEMENT attribute (composite | int | enum | real | boolean | color | point | rectangle | string | font)*>
<!ELEMENT composite (attribute)*>
<!ELEMENT int EMPTY>
<!ELEMENT enum EMPTY>
<!ELEMENT real EMPTY>
<!ELEMENT boolean EMPTY>
<!ELEMENT color EMPTY>
<!ELEMENT point EMPTY>
<!ELEMENT rectangle EMPTY>
<!ELEMENT string EMPTY>
<!ELEMENT font EMPTY>
<!ATTLIST layer name CDATA #REQUIRED> <!ATTLIST layer name CDATA #REQUIRED>
<!ATTLIST layer visible (true | false) #REQUIRED> <!ATTLIST layer visible (true | false) #REQUIRED>
<!ELEMENT object (attribute* , connections?)>
<!ATTLIST object type CDATA #REQUIRED> <!ATTLIST object type CDATA #REQUIRED>
<!ATTLIST object version NMTOKEN #REQUIRED> <!ATTLIST object version NMTOKEN #REQUIRED>
<!ATTLIST object id ID #REQUIRED> <!ATTLIST object id ID #REQUIRED>
<!ELEMENT connections (connection)*>
<!ELEMENT connection EMPTY>
<!ATTLIST connection handle NMTOKEN #REQUIRED> <!ATTLIST connection handle NMTOKEN #REQUIRED>
<!ATTLIST connection to IDREF #REQUIRED> <!ATTLIST connection to IDREF #REQUIRED>
<!ATTLIST connection connection NMTOKEN #REQUIRED> <!ATTLIST connection connection NMTOKEN #REQUIRED>
<!ELEMENT group (object | group)*>
<!ELEMENT attribute (composite | int | enum | real | boolean | color | point | rectangle | string | font)*>
<!ATTLIST attribute name CDATA #REQUIRED> <!ATTLIST attribute name CDATA #REQUIRED>
<!ELEMENT composite (attribute)*>
<!ATTLIST composite type CDATA #IMPLIED> <!ATTLIST composite type CDATA #IMPLIED>
<!ELEMENT int EMPTY>
<!ATTLIST int val NMTOKEN #REQUIRED> <!ATTLIST int val NMTOKEN #REQUIRED>
<!ELEMENT enum EMPTY>
<!ATTLIST enum val NMTOKEN #REQUIRED> <!ATTLIST enum val NMTOKEN #REQUIRED>
<!ELEMENT real EMPTY>
<!ATTLIST real val CDATA #REQUIRED> <!ATTLIST real val CDATA #REQUIRED>
<!ELEMENT boolean EMPTY>
<!ATTLIST boolean val (true | false) #REQUIRED> <!ATTLIST boolean val (true | false) #REQUIRED>
<!ELEMENT color EMPTY>
<!ATTLIST color val CDATA #REQUIRED> <!ATTLIST color val CDATA #REQUIRED>
<!ELEMENT point EMPTY>
<!ATTLIST point val CDATA #REQUIRED> <!ATTLIST point val CDATA #REQUIRED>
<!ELEMENT rectangle EMPTY>
<!ATTLIST rectangle val CDATA #REQUIRED> <!ATTLIST rectangle val CDATA #REQUIRED>
<!ELEMENT string EMPTY>
<!ATTLIST string val CDATA #IMPLIED> <!ATTLIST string val CDATA #IMPLIED>
<!ELEMENT font EMPTY>
<!ATTLIST font name CDATA #REQUIRED> <!ATTLIST font name CDATA #REQUIRED>
]> ]>
<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/"> <dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">

View File

@ -68,7 +68,7 @@ type="text/css"?>
</status> </status>
<abstract> <abstract>
<!-- edited the abstract for further clarity - bent --> <!-- edited the abstract for further clarity - bent -->
<p>This specification defines constructs that may be inserted into XML DTDs, schemas and document instances to describe links between objects. It uses XML syntax to create structures that can describe the simple unidirectional hyperlinks of today&apos;s HTML as well as more sophisticated links.</p> <p>This specification defines constructs that may be inserted into XML DTDs, schemas and document instances to describe links between objects. It uses XML syntax to create structures that can describe the simple unidirectional hyperlinks of today's HTML as well as more sophisticated links.</p>
</abstract> </abstract>
<pubstmt> <pubstmt>
<p>Burlington, Seekonk, et al.: World-Wide Web Consortium, XML Working Group, 1998.</p> <p>Burlington, Seekonk, et al.: World-Wide Web Consortium, XML Working Group, 1998.</p>
@ -99,7 +99,7 @@ type="text/css"?>
<sitem>1999-05-12: Prose/organization work. Re-organized some of the sections, removed XML constructs from the document, added descriptive prose, edited document text for clarity. Rewrote the link recognition section. bent</sitem> <sitem>1999-05-12: Prose/organization work. Re-organized some of the sections, removed XML constructs from the document, added descriptive prose, edited document text for clarity. Rewrote the link recognition section. bent</sitem>
<sitem>1999-05-17: Further prose work. Added non-normative examples. Clarified arcs. bent</sitem> <sitem>1999-05-17: Further prose work. Added non-normative examples. Clarified arcs. bent</sitem>
<sitem>1999-05-23: Edited for grammar and clarity. bent</sitem> <sitem>1999-05-23: Edited for grammar and clarity. bent</sitem>
<sitem>1999-05-27: Final once-over before sending to group. Fixed sjd&apos;s email address. bent</sitem> <sitem>1999-05-27: Final once-over before sending to group. Fixed sjd's email address. bent</sitem>
</slist> </slist>
</revisiondesc> </revisiondesc>
</header> </header>
@ -109,7 +109,7 @@ type="text/css"?>
<head>Introduction</head> <head>Introduction</head>
<p>This specification defines constructs that may be inserted into XML DTDs, schemas, and document instances to describe links between objects. A <termref def="dt-link">link</termref>, as the term is used here, is an explicit relationship between two or more data objects or portions of data objects. This specification is concerned with the syntax used to assert link existence and describe link characteristics. Implicit (unasserted) relationships, for example that of one word to the next or that of a word in a text to its entry in an on-line dictionary are obviously important, but outside its scope.</p> <p>This specification defines constructs that may be inserted into XML DTDs, schemas, and document instances to describe links between objects. A <termref def="dt-link">link</termref>, as the term is used here, is an explicit relationship between two or more data objects or portions of data objects. This specification is concerned with the syntax used to assert link existence and describe link characteristics. Implicit (unasserted) relationships, for example that of one word to the next or that of a word in a text to its entry in an on-line dictionary are obviously important, but outside its scope.</p>
<p>Links are asserted by <xtermref href="WD-xml-lang.html#dt-element">elements </xtermref> contained in <xtermref href="WD-xml-lang.html#dt-xml-doc">XML document instances</xtermref>. The simplest case is very like an HTML <code>A</code> link, and has these characteristics: <p>Links are asserted by <xtermref href="WD-xml-lang.html#dt-element">elements </xtermref> contained in <xtermref href="WD-xml-lang.html#dt-xml-doc">XML document instances</xtermref>. The simplest case is very like an HTML <code>A</code> link, and has these characteristics:
<ulist><item><p>The link is expressed at one of its ends (similar to the <code>A</code> element in some document)</p></item><item><p>Users can only initiate travel from that end to the other</p></item><item><p>The link&apos;s effect on windows, frames, go-back lists, stylesheets in use, and so on is mainly determined by browsers, not by the link itself. For example, traveral of <code>A</code> links normally replaces the current view, perhaps with a user option to open a new window.</p></item><item><p>The link goes to only one destination (although a server may have great freedom in finding or dynamically creating that destination).</p></item></ulist> <ulist><item><p>The link is expressed at one of its ends (similar to the <code>A</code> element in some document)</p></item><item><p>Users can only initiate travel from that end to the other</p></item><item><p>The link's effect on windows, frames, go-back lists, stylesheets in use, and so on is mainly determined by browsers, not by the link itself. For example, traveral of <code>A</code> links normally replaces the current view, perhaps with a user option to open a new window.</p></item><item><p>The link goes to only one destination (although a server may have great freedom in finding or dynamically creating that destination).</p></item></ulist>
</p> </p>
<p>While this set of characteristics is already very powerful and obviously has proven itself highly useful and effective, each of these assumptions also limits the range of hypertext functionality. The linking model defined here provides ways to create links that go beyond each of these specific characteristics, thus providing features previously available mostly in dedicated hypermedia systems. <p>While this set of characteristics is already very powerful and obviously has proven itself highly useful and effective, each of these assumptions also limits the range of hypertext functionality. The linking model defined here provides ways to create links that go beyond each of these specific characteristics, thus providing features previously available mostly in dedicated hypermedia systems.
</p> </p>
@ -137,7 +137,7 @@ document. bent-->
<glist><gitem><label><termdef id="dt-arc" term="Arc">arc</termdef></label><def><p>A symbolic representation of traversal behavior in links, especially the direction, context and timing of traversal.</p></def></gitem><gitem><label><termdef id="dt-eltree" term="Element Tree">element tree</termdef></label><def><p>A representation of the relevant structure specified by the tags and attributes in an XML document, based on &quot;groves&quot; as defined in the ISO DSSSL standard. </p></def></gitem><gitem><label><termdef id="dt-inline" term="In-Line Link">inline link</termdef></label><def><p>Abstractly, a <termref def="dt-link">link</termref> which serves as one of its own <termref def="dt-resource">resources</termref>. Concretely, a link where the content of the <termref def="dt-linkel">linking element</termref> serves as a <termref def="dt-particip-resource">participating resource</termref>. <glist><gitem><label><termdef id="dt-arc" term="Arc">arc</termdef></label><def><p>A symbolic representation of traversal behavior in links, especially the direction, context and timing of traversal.</p></def></gitem><gitem><label><termdef id="dt-eltree" term="Element Tree">element tree</termdef></label><def><p>A representation of the relevant structure specified by the tags and attributes in an XML document, based on &quot;groves&quot; as defined in the ISO DSSSL standard. </p></def></gitem><gitem><label><termdef id="dt-inline" term="In-Line Link">inline link</termdef></label><def><p>Abstractly, a <termref def="dt-link">link</termref> which serves as one of its own <termref def="dt-resource">resources</termref>. Concretely, a link where the content of the <termref def="dt-linkel">linking element</termref> serves as a <termref def="dt-particip-resource">participating resource</termref>.
HTML <code>A</code>, HyTime <code>clink</code>, and TEI <code>XREF</code> HTML <code>A</code>, HyTime <code>clink</code>, and TEI <code>XREF</code>
are all inline links.</p></def></gitem><gitem><label><termdef id="dt-link" term="Link">link</termdef></label><def><p>An explicit relationship between two or more data objects or portions of data objects.</p></def></gitem><gitem><label><termdef id="dt-linkel" term="Linking Element">linking element </termdef></label><def><p>An <xtermref href="WD-xml-lang.html#dt-element">element</xtermref> that asserts the existence and describes the characteristics of a <termref def="dt-link"> link</termref>.</p></def></gitem><gitem><label><termdef id="dt-local-resource" term="Local Resource">local resource</termdef></label><def><p>The content of an <termref def="dt-inline">inline</termref>linking element. Note that the content of the linking element could be explicitly pointed to by means of a regular <termref def="dt-locator">locator</termref> in the same linking element, in which case the resource is considered <termref def="dt-remote-resource"> remote</termref>, not local.</p></def></gitem><gitem><label><termdef id="dt-locator" term="Locator">locator</termdef> </label><def><p>Data, provided as part of a link, which identifies a are all inline links.</p></def></gitem><gitem><label><termdef id="dt-link" term="Link">link</termdef></label><def><p>An explicit relationship between two or more data objects or portions of data objects.</p></def></gitem><gitem><label><termdef id="dt-linkel" term="Linking Element">linking element </termdef></label><def><p>An <xtermref href="WD-xml-lang.html#dt-element">element</xtermref> that asserts the existence and describes the characteristics of a <termref def="dt-link"> link</termref>.</p></def></gitem><gitem><label><termdef id="dt-local-resource" term="Local Resource">local resource</termdef></label><def><p>The content of an <termref def="dt-inline">inline</termref>linking element. Note that the content of the linking element could be explicitly pointed to by means of a regular <termref def="dt-locator">locator</termref> in the same linking element, in which case the resource is considered <termref def="dt-remote-resource"> remote</termref>, not local.</p></def></gitem><gitem><label><termdef id="dt-locator" term="Locator">locator</termdef> </label><def><p>Data, provided as part of a link, which identifies a
<termref def="dt-resource">resource</termref>.</p></def></gitem><gitem><label><termdef id="dt-multidir" term="Multi-Directional Link">multidirectional link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose <termref def="dt-traversal"> traversal</termref> can be initiated from more than one of its <termref def="dt-particip-resource"> participating resources</termref>. Note that being able to &quot;go back&quot; after following a one-directional link does not make the link multidirectional.</p></def></gitem><gitem><label><termdef id="dt-outofline" term="Out-of-line Link">out-of-line link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose content does not serve as one of the link&apos;s <termref def="dt-particip-resource">participating resources </termref>. Such links presuppose a notion like <termref def="dt-xlg">extended link groups</termref>, which instruct application software where to look for links. Out-of-line links are generally required for supporting multidirectional <termref def="dt-traversal">traversal</termref> and for allowing read-only resources to have outgoing links.</p></def></gitem><gitem><label><termdef id="dt-parsedq" term="Parsed">parsed</termdef></label><def><p>In the context of link behavior, a parsed link is any link whose content is transcluded into the document where the link originated. The use of the term &quot;parsed&quot; directly refers to the concept in XML of a <termref def="dt-resource">resource</termref>.</p></def></gitem><gitem><label><termdef id="dt-multidir" term="Multi-Directional Link">multidirectional link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose <termref def="dt-traversal"> traversal</termref> can be initiated from more than one of its <termref def="dt-particip-resource"> participating resources</termref>. Note that being able to &quot;go back&quot; after following a one-directional link does not make the link multidirectional.</p></def></gitem><gitem><label><termdef id="dt-outofline" term="Out-of-line Link">out-of-line link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose content does not serve as one of the link's <termref def="dt-particip-resource">participating resources </termref>. Such links presuppose a notion like <termref def="dt-xlg">extended link groups</termref>, which instruct application software where to look for links. Out-of-line links are generally required for supporting multidirectional <termref def="dt-traversal">traversal</termref> and for allowing read-only resources to have outgoing links.</p></def></gitem><gitem><label><termdef id="dt-parsedq" term="Parsed">parsed</termdef></label><def><p>In the context of link behavior, a parsed link is any link whose content is transcluded into the document where the link originated. The use of the term &quot;parsed&quot; directly refers to the concept in XML of a
parsed entity.</p></def></gitem><gitem><label><termdef id="dt-particip-resource" term="Participating Resource"> participating resource</termdef></label><def><p>A <termref def="dt-resource">resource</termref> that belongs to a link. All resources are potential contributors to a link; participating resources are the actual contributors to a particular link.</p></def></gitem><gitem><label><termdef id="dt-remote-resource" term="Remote Resource">remote resource</termdef></label><def><p>Any participating resource of a link that is pointed to with a locator. </p></def></gitem><gitem><label><termdef id="dt-resource" term="Resource">resource</termdef></label><def><p>In the abstract sense, an addressable unit of information or service that is participating in a <termref def="dt-link">link</termref>. Examples include files, images, documents, programs, and query results. Concretely, anything reachable by the use of a <termref def="dt-locator">locator</termref> in some <termref def="dt-linkel">linking element</termref>. Note that this term and its definition are taken from the basic specifications governing the World Wide Web. <!--Joel notes: need link here. bent asks: A link?--> parsed entity.</p></def></gitem><gitem><label><termdef id="dt-particip-resource" term="Participating Resource"> participating resource</termdef></label><def><p>A <termref def="dt-resource">resource</termref> that belongs to a link. All resources are potential contributors to a link; participating resources are the actual contributors to a particular link.</p></def></gitem><gitem><label><termdef id="dt-remote-resource" term="Remote Resource">remote resource</termdef></label><def><p>Any participating resource of a link that is pointed to with a locator. </p></def></gitem><gitem><label><termdef id="dt-resource" term="Resource">resource</termdef></label><def><p>In the abstract sense, an addressable unit of information or service that is participating in a <termref def="dt-link">link</termref>. Examples include files, images, documents, programs, and query results. Concretely, anything reachable by the use of a <termref def="dt-locator">locator</termref> in some <termref def="dt-linkel">linking element</termref>. Note that this term and its definition are taken from the basic specifications governing the World Wide Web. <!--Joel notes: need link here. bent asks: A link?-->
</p></def></gitem><gitem><label><termdef id="dt-subresource" term="sub-Resource">sub-resource</termdef></label><def><p>A portion of a resource, pointed to as the precise destination of a link. As one example, a link might specify that an entire document be retrieved and displayed, but that some specific part(s) of it is the specific linked data, to be treated in an application-appropriate manner such as indication by highlighting, scrolling, etc.</p></def></gitem><gitem><label><termdef id="dt-traversal" term="Traversal">traversal</termdef></label><def><p>The action of using a <termref def="dt-link">link</termref>; that is, of accessing a <termref def="dt-resource">resource</termref>. Traversal may be initiated by a user action (for example, clicking on the displayed content of a <termref def="dt-linkel">linking element</termref>) or occur under program control.</p></def></gitem></glist> </p></def></gitem><gitem><label><termdef id="dt-subresource" term="sub-Resource">sub-resource</termdef></label><def><p>A portion of a resource, pointed to as the precise destination of a link. As one example, a link might specify that an entire document be retrieved and displayed, but that some specific part(s) of it is the specific linked data, to be treated in an application-appropriate manner such as indication by highlighting, scrolling, etc.</p></def></gitem><gitem><label><termdef id="dt-traversal" term="Traversal">traversal</termdef></label><def><p>The action of using a <termref def="dt-link">link</termref>; that is, of accessing a <termref def="dt-resource">resource</termref>. Traversal may be initiated by a user action (for example, clicking on the displayed content of a <termref def="dt-linkel">linking element</termref>) or occur under program control.</p></def></gitem></glist>
</p> </p>
@ -156,8 +156,8 @@ document. bent-->
<p>A locator generally contains a URI, as described in IETF RFCs <bibref ref="rfc1738"/> and <bibref ref="rfc1808"/>. As these RFCs state, the URI may include a trailing <emph>query</emph> (marked by a leading &quot;<code>?</code>&quot;), and be followed by a &quot;<code>#</code>&quot; and a <emph>fragment identifier</emph>, with the query interpreted by the host providing the indicated resource, and the interpretation of the fragment identifier dependent on the data type of the indicated resource.</p> <p>A locator generally contains a URI, as described in IETF RFCs <bibref ref="rfc1738"/> and <bibref ref="rfc1808"/>. As these RFCs state, the URI may include a trailing <emph>query</emph> (marked by a leading &quot;<code>?</code>&quot;), and be followed by a &quot;<code>#</code>&quot; and a <emph>fragment identifier</emph>, with the query interpreted by the host providing the indicated resource, and the interpretation of the fragment identifier dependent on the data type of the indicated resource.</p>
<!--Is there some restriction on URNs having queries and/or fragment identifiers? Since these RFCs don't mention URIs explicitly, should the wording here lead from URLs to URIs more explicitly? -elm--> <!--Is there some restriction on URNs having queries and/or fragment identifiers? Since these RFCs don't mention URIs explicitly, should the wording here lead from URLs to URIs more explicitly? -elm-->
<p>In order to locate XML documents and portions of documents, a locator value may contain either a <xtermref href="http://www.w3.org/Addressing/rfc1738.txt"> URI</xtermref> or a fragment identifier, or both. Any fragment identifier for pointing into XML must be an <xtermref href="http://www.w3.org/TR/WD-xptr#dt-xpointer"> XPointer</xtermref>.</p> <p>In order to locate XML documents and portions of documents, a locator value may contain either a <xtermref href="http://www.w3.org/Addressing/rfc1738.txt"> URI</xtermref> or a fragment identifier, or both. Any fragment identifier for pointing into XML must be an <xtermref href="http://www.w3.org/TR/WD-xptr#dt-xpointer"> XPointer</xtermref>.</p>
<p>Special syntax may be used to request the use of particular processing models in accessing the locator&apos;s resource. This is designed to reflect the realities of network operation, where it may or may not be desirable to exercise fine control over the distribution of work between local and remote processors. <p>Special syntax may be used to request the use of particular processing models in accessing the locator's resource. This is designed to reflect the realities of network operation, where it may or may not be desirable to exercise fine control over the distribution of work between local and remote processors.
<scrap id="locator" lang="ebnf"><head>Locator</head><prod id="nt-locator"><lhs>Locator</lhs><rhs><nt def="nt-uri">URI</nt></rhs><rhs>| <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs><rhs>| <nt def="nt-uri">URI</nt> <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs></prod><prod id="nt-connector"><lhs>Connector</lhs><rhs>&apos;#&apos; | &apos;|&apos;</rhs></prod><prod id="nt-uri"><lhs>URI</lhs><rhs><xnt href="WD-xml-lang.html#NT-URLchar">URIchar*</xnt></rhs></prod></scrap> <scrap id="locator" lang="ebnf"><head>Locator</head><prod id="nt-locator"><lhs>Locator</lhs><rhs><nt def="nt-uri">URI</nt></rhs><rhs>| <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs><rhs>| <nt def="nt-uri">URI</nt> <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs></prod><prod id="nt-connector"><lhs>Connector</lhs><rhs>'#' | '|'</rhs></prod><prod id="nt-uri"><lhs>URI</lhs><rhs><xnt href="WD-xml-lang.html#NT-URLchar">URIchar*</xnt></rhs></prod></scrap>
</p> </p>
<p><termdef id="dt-designated" term="Designated Resource">In this discussion, the term <term>designated resource</term> refers to the resource which an entire locator serves to locate.</termdef> The following rules apply: <p><termdef id="dt-designated" term="Designated Resource">In this discussion, the term <term>designated resource</term> refers to the resource which an entire locator serves to locate.</termdef> The following rules apply:
<ulist><item><p><termdef id="dt-containing-resource" term="Containing Resource"> The URI, if provided, locates a resource called the <term>containing resource</term>.</termdef></p></item><item><p>If the URI is not provided, the containing resource is considered to be the document in which the linking element is contained. <ulist><item><p><termdef id="dt-containing-resource" term="Containing Resource"> The URI, if provided, locates a resource called the <term>containing resource</term>.</termdef></p></item><item><p>If the URI is not provided, the containing resource is considered to be the document in which the linking element is contained.
@ -169,7 +169,7 @@ document. bent-->
</p> </p>
<p>Note that the definition of a URI includes an optional query component. </p> <p>Note that the definition of a URI includes an optional query component. </p>
<p>In the case where the URI contains a query (to be interpreted by the server), information providers and authors of server software are urged to use queries as follows: <p>In the case where the URI contains a query (to be interpreted by the server), information providers and authors of server software are urged to use queries as follows:
<scrap id="querysyntax" lang="ebnf"><head>Query</head><prod id="nt-query"><lhs>Query</lhs><rhs>&apos;XML-XPTR=&apos; (<xnt href="http://www.w3.org/TR/WD-xptr"> XPointer</xnt> | <xnt href="http://www.w3.org/TR/REC-xml#NT-Name">Name</xnt>)</rhs></prod></scrap> <scrap id="querysyntax" lang="ebnf"><head>Query</head><prod id="nt-query"><lhs>Query</lhs><rhs>'XML-XPTR=' (<xnt href="http://www.w3.org/TR/WD-xptr"> XPointer</xnt> | <xnt href="http://www.w3.org/TR/REC-xml#NT-Name">Name</xnt>)</rhs></prod></scrap>
</p> </p>
<!-- fixed link to XML recommendation - bent --> <!-- fixed link to XML recommendation - bent -->
</div1> </div1>
@ -177,7 +177,7 @@ document. bent-->
<?Pub Dtl?> <?Pub Dtl?>
<head>Link Recognition</head> <head>Link Recognition</head>
<p>The existence of a <termref def="dt-link">link</termref> is asserted by a <termref def="dt-linkel">linking element</termref>. Linking elements must be recognized reliably by application software in order to provide appropriate display and behavior. There are several ways link recognition could be accomplished: for example, reserving element type names, reserving attributes names, leaving the matter of recognition entirely up to stylesheets and application software, or using the XLink <xtermref href="http://www.w3.org/TR/REC-xml-names/">namespace</xtermref> to specify element names and attribute names that would be recognized by namespace and XLink-aware processors. Using element and attribute names within the XLink namespace provides a balance between giving users control of their own markup language design and keeping the identification of linking elements simple and unambiguous.</p> <p>The existence of a <termref def="dt-link">link</termref> is asserted by a <termref def="dt-linkel">linking element</termref>. Linking elements must be recognized reliably by application software in order to provide appropriate display and behavior. There are several ways link recognition could be accomplished: for example, reserving element type names, reserving attributes names, leaving the matter of recognition entirely up to stylesheets and application software, or using the XLink <xtermref href="http://www.w3.org/TR/REC-xml-names/">namespace</xtermref> to specify element names and attribute names that would be recognized by namespace and XLink-aware processors. Using element and attribute names within the XLink namespace provides a balance between giving users control of their own markup language design and keeping the identification of linking elements simple and unambiguous.</p>
<p>The two approaches to identifying linking elements are relatively simple to implement. For example, here&apos;s how the HTML <code>A</code> element would be declared using attributes within the XLink namespace, and then how an element within the XLink namespace might do the same: <p>The two approaches to identifying linking elements are relatively simple to implement. For example, here's how the HTML <code>A</code> element would be declared using attributes within the XLink namespace, and then how an element within the XLink namespace might do the same:
<eg>&lt;A xlink:type=&quot;simple&quot; xlink:href=&quot;http://www.w3.org/TR/wd-xlink/&quot; <eg>&lt;A xlink:type=&quot;simple&quot; xlink:href=&quot;http://www.w3.org/TR/wd-xlink/&quot;
xlink:title=&quot;The Xlink Working Draft&quot;&gt;The XLink Working Draft.&lt;/A&gt;</eg> xlink:title=&quot;The Xlink Working Draft&quot;&gt;The XLink Working Draft.&lt;/A&gt;</eg>
<eg>&lt;xlink:simple href=&quot;http://www.w3.org/TR/wd-xlink/&quot; <eg>&lt;xlink:simple href=&quot;http://www.w3.org/TR/wd-xlink/&quot;
@ -208,8 +208,8 @@ title=&quot;The XLink Working Draft&quot;&gt;The XLink Working Draft&lt;/xlink:s
</div2> </div2>
<div2 id="link-semantics"> <div2 id="link-semantics">
<head>Semantic Attributes</head> <head>Semantic Attributes</head>
<p>There are two attributes associated with semantics, <code>role</code> and <code>title</code>. The <code>role</code> attribute is a generic string used to describe the function of the link&apos;s content. For example, a poem might have a link with a <code>role=&quot;stanza&quot;</code>. The <code>role</code> is also used as an identifier for the <code>from</code> and <code>to</code> attributes of arcs.</p> <p>There are two attributes associated with semantics, <code>role</code> and <code>title</code>. The <code>role</code> attribute is a generic string used to describe the function of the link's content. For example, a poem might have a link with a <code>role=&quot;stanza&quot;</code>. The <code>role</code> is also used as an identifier for the <code>from</code> and <code>to</code> attributes of arcs.</p>
<p>The <code>title</code> attribute is designed to provide human-readable text describing the link. It is very useful for those who have text-based applications, whether that be due to a constricted device that cannot display the link&apos;s content, or if it&apos;s being read by an application to a visually-impaired user, or if it&apos;s being used to create a table of links. The <code>title</code> attribute contains a simple, descriptive string.</p> <p>The <code>title</code> attribute is designed to provide human-readable text describing the link. It is very useful for those who have text-based applications, whether that be due to a constricted device that cannot display the link's content, or if it's being read by an application to a visually-impaired user, or if it's being used to create a table of links. The <code>title</code> attribute contains a simple, descriptive string.</p>
</div2> </div2>
</div1> </div1>
<div1 id="linking-elements"> <div1 id="linking-elements">
@ -257,7 +257,7 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
<p>Note that it is meaningful to have an out-of-line simple link, although <p>Note that it is meaningful to have an out-of-line simple link, although
such links are uncommon. They are called &quot;one-ended&quot; and are typically used such links are uncommon. They are called &quot;one-ended&quot; and are typically used
to associate discrete semantic properties with locations. The properties might to associate discrete semantic properties with locations. The properties might
be expressed by attributes on the link, the link&apos;s element type name, or in be expressed by attributes on the link, the link's element type name, or in
some other way, and are not considered full-fledged resources of the link. some other way, and are not considered full-fledged resources of the link.
Most out-of-line links are extended links, as these have a far wider range Most out-of-line links are extended links, as these have a far wider range
of uses.</p> of uses.</p>
@ -270,7 +270,7 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
<p>These additional capabilities of extended links are required for: <p>These additional capabilities of extended links are required for:
<ulist><item><p>Enabling outgoing links in documents that cannot be modified to add an inline link</p></item><item><p>Creating links to and from resources in formats with no native support for embedded links (such as most multimedia formats)</p></item><item><p>Applying and filtering sets of relevant links on demand</p></item><item><p>Enabling other advanced hypermedia capabilities</p></item></ulist> <ulist><item><p>Enabling outgoing links in documents that cannot be modified to add an inline link</p></item><item><p>Creating links to and from resources in formats with no native support for embedded links (such as most multimedia formats)</p></item><item><p>Applying and filtering sets of relevant links on demand</p></item><item><p>Enabling other advanced hypermedia capabilities</p></item></ulist>
</p> </p>
<p>Application software might be expected to provide traversal among all of a link&apos;s participating resources (subject to semantic constraints outside the scope of this specification) and to signal the fact that a given resource or sub-resource participates in one or more links when it is displayed (even though there is no markup at exactly that point to signal it).</p> <p>Application software might be expected to provide traversal among all of a link's participating resources (subject to semantic constraints outside the scope of this specification) and to signal the fact that a given resource or sub-resource participates in one or more links when it is displayed (even though there is no markup at exactly that point to signal it).</p>
<p>A linking element for an extended link contains a series of <xtermref href="http://www.w3.org/TR/REC-xml/#dt-parentchild">child elements</xtermref> that serve as locators and arcs. Because an extended link can have more than one remote resource, it separates out linking itself from the mechanisms used to locate each resource (whereas a simple link combines the two).</p> <p>A linking element for an extended link contains a series of <xtermref href="http://www.w3.org/TR/REC-xml/#dt-parentchild">child elements</xtermref> that serve as locators and arcs. Because an extended link can have more than one remote resource, it separates out linking itself from the mechanisms used to locate each resource (whereas a simple link combines the two).</p>
<p>The <code>xlink:type</code> attribute value for an extended link must be <code> extended</code>, if the link is being instantiated on an arbitrary element. Note that extended links introduce variants of the <code>show</code> and <code>actuate</code> behavior attributes. These attributes, the <code>showdefault</code> and <code>actuatedefault</code> define the same behavior as their counterparts. However, in this case, they are considered to define the default behavior for all the linking elements that they contain.</p> <p>The <code>xlink:type</code> attribute value for an extended link must be <code> extended</code>, if the link is being instantiated on an arbitrary element. Note that extended links introduce variants of the <code>show</code> and <code>actuate</code> behavior attributes. These attributes, the <code>showdefault</code> and <code>actuatedefault</code> define the same behavior as their counterparts. However, in this case, they are considered to define the default behavior for all the linking elements that they contain.</p>
<p>However, when a linking element within an extended link has a <code>show</code> or <code>actuate</code> attribute of its own, that attribute overrides the defaults set on the extended linking element.</p> <p>However, when a linking element within an extended link has a <code>show</code> or <code>actuate</code> attribute of its own, that attribute overrides the defaults set on the extended linking element.</p>
@ -293,13 +293,13 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
xlink:showdefault (new|parsed|replace) #IMPLIED xlink:showdefault (new|parsed|replace) #IMPLIED
xlink:actuatedefault (user|auto) #IMPLIED &gt;</eg> xlink:actuatedefault (user|auto) #IMPLIED &gt;</eg>
The following two examples demonstrate how each of the above might appear within a document instance. Note that the content of these examples would be other elements. For brevity&apos;s sake, they&apos;ve been left blank. The first example shows how the link might appear, using an explicit XLink extended link: The following two examples demonstrate how each of the above might appear within a document instance. Note that the content of these examples would be other elements. For brevity's sake, they've been left blank. The first example shows how the link might appear, using an explicit XLink extended link:
<eg>&lt;xlink:extended role=&quot;address book&quot; title=&quot;Ben&apos;s Address Book&quot; showdefault=&quot;replace&quot; actuatedefault=&quot;user&quot;&gt; ... &lt;/xlink:extended&gt;</eg> <eg>&lt;xlink:extended role=&quot;address book&quot; title=&quot;Ben's Address Book&quot; showdefault=&quot;replace&quot; actuatedefault=&quot;user&quot;&gt; ... &lt;/xlink:extended&gt;</eg>
And the second shows how the link might appear, using an arbitrary element: And the second shows how the link might appear, using an arbitrary element:
<eg>&lt;foo xlink:type=&quot;extended&quot; xlink:role=&quot;address book&quot; xlink:title=&quot;Ben&apos;s Address Book&quot; xlink:showdefault=&quot;replace&quot; xlink:actuatedefault=&quot;user&quot;&gt; ... &lt;/foo&gt;</eg> <eg>&lt;foo xlink:type=&quot;extended&quot; xlink:role=&quot;address book&quot; xlink:title=&quot;Ben's Address Book&quot; xlink:showdefault=&quot;replace&quot; xlink:actuatedefault=&quot;user&quot;&gt; ... &lt;/foo&gt;</eg>
</p> </p>
</div2> </div2>
<div2 id="xlink-arcs"> <div2 id="xlink-arcs">

View File

@ -1,6 +1,6 @@
./test/valid/xlink.xml:450: validity error: ID dt-arc already defined ./test/valid/xlink.xml:450: validity error: ID dt-arc already defined
<p><termdef id="dt-arc" term="Arc">An <term>arc</term> is contained within an <p><termdef id="dt-arc" term="Arc">An <term>arc</term> is contained within an
^ ^
./test/valid/xlink.xml:530: validity error: IDREF attribute def reference an unknown ID 'dt-xlg' ./test/valid/xlink.xml:530: validity error: IDREF attribute def reference an unknown ID "dt-xlg"
^ ^

View File

@ -1,8 +1,8 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!DOCTYPE test [ <!DOCTYPE test [
<!ELEMENT test (#PCDATA)>
<!ENTITY % xx "&#37;zz;"> <!ENTITY % xx "&#37;zz;">
<!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >'> <!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >'>
<!ENTITY tricky "error-prone"> <!ENTITY tricky "error-prone">
<!ELEMENT test (#PCDATA)>
]> ]>
<test>This sample shows a &tricky; method.</test> <test>This sample shows a &tricky; method.</test>

View File

@ -1,5 +1,5 @@
<!DOCTYPE doc [ <!DOCTYPE doc [
<!ENTITY % YN '"Yes"' > <!ENTITY YN '"Yes"' >
<!ENTITY WhatHeSaid "He said %YN;" > <!ENTITY WhatHeSaid "He said &YN;" >
]> ]>
<doc>&WhatHeSaid;</doc> <doc>&WhatHeSaid;</doc>

View File

@ -73,6 +73,7 @@ xmlSAXHandler emptySAXHandlerStruct = {
NULL, /* xmlParserError */ NULL, /* xmlParserError */
NULL, /* xmlParserError */ NULL, /* xmlParserError */
NULL, /* getParameterEntity */ NULL, /* getParameterEntity */
NULL, /* cdataBlock; */
}; };
xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct; xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
@ -454,6 +455,21 @@ processingInstructionDebug(void *ctx, const xmlChar *target,
(char *) target, (char *) data); (char *) target, (char *) data);
} }
/**
* cdataBlockDebug:
* @ctx: the user data (XML parser context)
* @value: The pcdata content
* @len: the block length
*
* called when a pcdata block has been parsed
*/
void
cdataBlockDebug(void *ctx, const xmlChar *value, int len)
{
fprintf(stderr, "SAX.pcdata(%.20s, %d)\n",
(char *) value, len);
}
/** /**
* commentDebug: * commentDebug:
* @ctxt: An XML parser context * @ctxt: An XML parser context
@ -553,6 +569,7 @@ xmlSAXHandler debugSAXHandlerStruct = {
errorDebug, errorDebug,
fatalErrorDebug, fatalErrorDebug,
getParameterEntityDebug, getParameterEntityDebug,
cdataBlockDebug
}; };
xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;

322
tester.c
View File

@ -14,6 +14,8 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <stdio.h>
#include <stdarg.h>
#ifdef HAVE_SYS_TYPES_H #ifdef HAVE_SYS_TYPES_H
#include <sys/types.h> #include <sys/types.h>
@ -39,6 +41,7 @@
#include "xmlmemory.h" #include "xmlmemory.h"
#include "parser.h" #include "parser.h"
#include "parserInternals.h"
#include "HTMLparser.h" #include "HTMLparser.h"
#include "HTMLtree.h" #include "HTMLtree.h"
#include "tree.h" #include "tree.h"
@ -51,18 +54,252 @@ static int copy = 0;
static int recovery = 0; static int recovery = 0;
static int noent = 0; static int noent = 0;
static int noout = 0; static int noout = 0;
static int nowrap = 0;
static int valid = 0; static int valid = 0;
static int postvalid = 0; static int postvalid = 0;
static int repeat = 0; static int repeat = 0;
static int insert = 0; static int insert = 0;
static int compress = 0; static int compress = 0;
static int html = 0; static int html = 0;
static int htmlout = 0;
static int shell = 0; static int shell = 0;
static int push = 0; static int push = 0;
static int blanks = 0; static int noblanks = 0;
extern int xmlDoValidityCheckingDefaultValue; extern int xmlDoValidityCheckingDefaultValue;
extern int xmlGetWarningsDefaultValue;
/************************************************************************
* *
* HTML ouput *
* *
************************************************************************/
char buffer[50000];
void
xmlHTMLEncodeSend(void) {
char *result;
result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
if (result) {
fprintf(stderr, "%s", result);
xmlFree(result);
}
buffer[0] = 0;
}
/**
* xmlHTMLPrintFileInfo:
* @input: an xmlParserInputPtr input
*
* Displays the associated file and line informations for the current input
*/
void
xmlHTMLPrintFileInfo(xmlParserInputPtr input) {
fprintf(stderr, "<p>");
if (input != NULL) {
if (input->filename) {
sprintf(&buffer[strlen(buffer)], "%s:%d: ", input->filename,
input->line);
} else {
sprintf(&buffer[strlen(buffer)], "Entity: line %d: ", input->line);
}
}
xmlHTMLEncodeSend();
}
/**
* xmlHTMLPrintFileContext:
* @input: an xmlParserInputPtr input
*
* Displays current context within the input content for error tracking
*/
void
xmlHTMLPrintFileContext(xmlParserInputPtr input) {
const xmlChar *cur, *base;
int n;
if (input == NULL) return;
fprintf(stderr, "<pre>\n");
cur = input->cur;
base = input->base;
while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) {
cur--;
}
n = 0;
while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r'))
cur--;
if ((*cur == '\n') || (*cur == '\r')) cur++;
base = cur;
n = 0;
while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) {
sprintf(&buffer[strlen(buffer)], "%c", (unsigned char) *cur++);
n++;
}
sprintf(&buffer[strlen(buffer)], "\n");
cur = input->cur;
while ((*cur == '\n') || (*cur == '\r'))
cur--;
n = 0;
while ((cur != base) && (n++ < 80)) {
sprintf(&buffer[strlen(buffer)], " ");
base++;
}
sprintf(&buffer[strlen(buffer)],"^\n");
xmlHTMLEncodeSend();
fprintf(stderr, "</pre>");
}
/**
* xmlHTMLError:
* @ctx: an XML parser context
* @msg: the message to display/transmit
* @...: extra parameters for the message display
*
* Display and format an error messages, gives file, line, position and
* extra parameters.
*/
void
xmlHTMLError(void *ctx, const char *msg, ...)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr input;
xmlParserInputPtr cur = NULL;
va_list args;
buffer[0] = 0;
input = ctxt->input;
if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
cur = input;
input = ctxt->inputTab[ctxt->inputNr - 2];
}
xmlHTMLPrintFileInfo(input);
fprintf(stderr, "<b>error</b>: ");
va_start(args, msg);
vsprintf(&buffer[strlen(buffer)], msg, args);
va_end(args);
xmlHTMLEncodeSend();
fprintf(stderr, "</p>\n");
xmlHTMLPrintFileContext(input);
xmlHTMLEncodeSend();
}
/**
* xmlHTMLWarning:
* @ctx: an XML parser context
* @msg: the message to display/transmit
* @...: extra parameters for the message display
*
* Display and format a warning messages, gives file, line, position and
* extra parameters.
*/
void
xmlHTMLWarning(void *ctx, const char *msg, ...)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr input;
xmlParserInputPtr cur = NULL;
va_list args;
buffer[0] = 0;
input = ctxt->input;
if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
cur = input;
input = ctxt->inputTab[ctxt->inputNr - 2];
}
xmlHTMLPrintFileInfo(input);
fprintf(stderr, "<b>warning</b>: ");
va_start(args, msg);
vsprintf(&buffer[strlen(buffer)], msg, args);
va_end(args);
xmlHTMLEncodeSend();
fprintf(stderr, "</p>\n");
xmlHTMLPrintFileContext(input);
xmlHTMLEncodeSend();
}
/**
* xmlHTMLValidityError:
* @ctx: an XML parser context
* @msg: the message to display/transmit
* @...: extra parameters for the message display
*
* Display and format an validity error messages, gives file,
* line, position and extra parameters.
*/
void
xmlHTMLValidityError(void *ctx, const char *msg, ...)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr input;
va_list args;
buffer[0] = 0;
input = ctxt->input;
if ((input->filename == NULL) && (ctxt->inputNr > 1))
input = ctxt->inputTab[ctxt->inputNr - 2];
xmlHTMLPrintFileInfo(input);
fprintf(stderr, "<b>validity error</b>: ");
va_start(args, msg);
vsprintf(&buffer[strlen(buffer)], msg, args);
va_end(args);
xmlHTMLEncodeSend();
fprintf(stderr, "</p>\n");
xmlHTMLPrintFileContext(input);
xmlHTMLEncodeSend();
}
/**
* xmlHTMLValidityWarning:
* @ctx: an XML parser context
* @msg: the message to display/transmit
* @...: extra parameters for the message display
*
* Display and format a validity warning messages, gives file, line,
* position and extra parameters.
*/
void
xmlHTMLValidityWarning(void *ctx, const char *msg, ...)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr input;
va_list args;
buffer[0] = 0;
input = ctxt->input;
if ((input->filename == NULL) && (ctxt->inputNr > 1))
input = ctxt->inputTab[ctxt->inputNr - 2];
xmlHTMLPrintFileInfo(input);
fprintf(stderr, "<b>validity warning</b>: ");
va_start(args, msg);
vsprintf(&buffer[strlen(buffer)], msg, args);
va_end(args);
xmlHTMLEncodeSend();
fprintf(stderr, "</p>\n");
xmlHTMLPrintFileContext(input);
xmlHTMLEncodeSend();
}
/************************************************************************
* *
* Shell Interface *
* *
************************************************************************/
/** /**
* xmlShellReadline: * xmlShellReadline:
* @prompt: the prompt value * @prompt: the prompt value
@ -97,6 +334,11 @@ xmlShellReadline(char *prompt) {
#endif #endif
} }
/************************************************************************
* *
* Test processing *
* *
************************************************************************/
void parseAndPrintFile(char *filename) { void parseAndPrintFile(char *filename) {
xmlDocPtr doc = NULL, tmp; xmlDocPtr doc = NULL, tmp;
@ -129,9 +371,40 @@ void parseAndPrintFile(char *filename) {
xmlFreeParserCtxt(ctxt); xmlFreeParserCtxt(ctxt);
} }
} }
} else if (recovery) } else if (recovery) {
doc = xmlRecoverFile(filename); doc = xmlRecoverFile(filename);
} else if (htmlout) {
int ret;
xmlParserCtxtPtr ctxt;
xmlSAXHandler silent, *old;
ctxt = xmlCreateFileParserCtxt(filename);
memcpy(&silent, ctxt->sax, sizeof(silent));
old = ctxt->sax;
silent.error = xmlHTMLError;
if (xmlGetWarningsDefaultValue)
silent.warning = xmlHTMLWarning;
else else
silent.warning = NULL;
silent.fatalError = xmlHTMLError;
ctxt->sax = &silent;
ctxt->vctxt.error = xmlHTMLValidityError;
if (xmlGetWarningsDefaultValue)
ctxt->vctxt.warning = xmlHTMLValidityWarning;
else
ctxt->vctxt.warning = NULL;
xmlParseDocument(ctxt);
ret = ctxt->wellFormed;
doc = ctxt->myDoc;
ctxt->sax = old;
xmlFreeParserCtxt(ctxt);
if (!ret) {
xmlFreeDoc(doc);
doc = NULL;
}
} else
doc = xmlParseFile(filename); doc = xmlParseFile(filename);
} }
@ -155,8 +428,8 @@ void parseAndPrintFile(char *filename) {
int nb, i; int nb, i;
xmlNodePtr node; xmlNodePtr node;
if (doc->root != NULL) { if (doc->children != NULL) {
node = doc->root; node = doc->children;
while ((node != NULL) && (node->last == NULL)) node = node->next; while ((node != NULL) && (node->last == NULL)) node = node->next;
if (node != NULL) { if (node != NULL) {
nb = xmlValidGetValidElements(node->last, NULL, list, 256); nb = xmlValidGetValidElements(node->last, NULL, list, 256);
@ -224,6 +497,12 @@ int main(int argc, char **argv) {
else if ((!strcmp(argv[i], "-noout")) || else if ((!strcmp(argv[i], "-noout")) ||
(!strcmp(argv[i], "--noout"))) (!strcmp(argv[i], "--noout")))
noout++; noout++;
else if ((!strcmp(argv[i], "-htmlout")) ||
(!strcmp(argv[i], "--htmlout")))
htmlout++;
else if ((!strcmp(argv[i], "-nowrap")) ||
(!strcmp(argv[i], "--nowrap")))
nowrap++;
else if ((!strcmp(argv[i], "-valid")) || else if ((!strcmp(argv[i], "-valid")) ||
(!strcmp(argv[i], "--valid"))) (!strcmp(argv[i], "--valid")))
valid++; valid++;
@ -244,15 +523,19 @@ int main(int argc, char **argv) {
compress++; compress++;
xmlSetCompressMode(9); xmlSetCompressMode(9);
} }
else if ((!strcmp(argv[i], "-blanks")) ||
(!strcmp(argv[i], "--blanks"))) {
blanks++;
xmlKeepBlanksDefault(1);
}
else if ((!strcmp(argv[i], "-html")) || else if ((!strcmp(argv[i], "-html")) ||
(!strcmp(argv[i], "--html"))) { (!strcmp(argv[i], "--html"))) {
html++; html++;
} }
else if ((!strcmp(argv[i], "-nowarning")) ||
(!strcmp(argv[i], "--nowarning"))) {
xmlGetWarningsDefaultValue = 0;
}
else if ((!strcmp(argv[i], "-noblanks")) ||
(!strcmp(argv[i], "--noblanks"))) {
noblanks++;
xmlKeepBlanksDefault(0);
}
else if ((!strcmp(argv[i], "-shell")) || else if ((!strcmp(argv[i], "-shell")) ||
(!strcmp(argv[i], "--shell"))) { (!strcmp(argv[i], "--shell"))) {
shell++; shell++;
@ -261,6 +544,17 @@ int main(int argc, char **argv) {
} }
if (noent != 0) xmlSubstituteEntitiesDefault(1); if (noent != 0) xmlSubstituteEntitiesDefault(1);
if (valid != 0) xmlDoValidityCheckingDefaultValue = 1; if (valid != 0) xmlDoValidityCheckingDefaultValue = 1;
if ((htmlout) && (!nowrap)) {
fprintf(stderr,
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n");
fprintf(stderr, "\t\"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
fprintf(stderr,
"<html><head><title>%s output</title></head>\n",
argv[0]);
fprintf(stderr,
"<body bgcolor=\"#ffffff\"><h1 align=\"center\">%s output</h1>\n",
argv[0]);
}
for (i = 1; i < argc ; i++) { for (i = 1; i < argc ; i++) {
if (argv[i][0] != '-') { if (argv[i][0] != '-') {
if (repeat) { if (repeat) {
@ -271,8 +565,11 @@ int main(int argc, char **argv) {
files ++; files ++;
} }
} }
if ((htmlout) && (!nowrap)) {
fprintf(stderr, "</body></html>\n");
}
if (files == 0) { if (files == 0) {
printf("Usage : %s [--debug] [--shell] [--debugent] [--copy] [--recover] [--noent] [--noout] [--valid] [--repeat] XMLfiles ...\n", printf("Usage : %s [--debug] [--debugent] [--copy] [--recover] [--noent] [--noout] [--valid] [--repeat] XMLfiles ...\n",
argv[0]); argv[0]);
printf("\tParse the XML files and output the result of the parsing\n"); printf("\tParse the XML files and output the result of the parsing\n");
printf("\t--debug : dump a debug tree of the in-memory document\n"); printf("\t--debug : dump a debug tree of the in-memory document\n");
@ -281,6 +578,8 @@ int main(int argc, char **argv) {
printf("\t--recover : output what was parsable on broken XML documents\n"); printf("\t--recover : output what was parsable on broken XML documents\n");
printf("\t--noent : substitute entity references by their value\n"); printf("\t--noent : substitute entity references by their value\n");
printf("\t--noout : don't output the result tree\n"); printf("\t--noout : don't output the result tree\n");
printf("\t--htmlout : output results as HTML\n");
printf("\t--nowarp : do not put HTML doc wrapper\n");
printf("\t--valid : validate the document in addition to std well-formed check\n"); printf("\t--valid : validate the document in addition to std well-formed check\n");
printf("\t--postvalid : do a posteriori validation, i.e after parsing\n"); printf("\t--postvalid : do a posteriori validation, i.e after parsing\n");
printf("\t--repeat : repeat 100 times, for timing or profiling\n"); printf("\t--repeat : repeat 100 times, for timing or profiling\n");
@ -288,8 +587,9 @@ int main(int argc, char **argv) {
printf("\t--compress : turn on gzip compression of output\n"); printf("\t--compress : turn on gzip compression of output\n");
printf("\t--html : use the HTML parser\n"); printf("\t--html : use the HTML parser\n");
printf("\t--shell : run a navigating shell\n"); printf("\t--shell : run a navigating shell\n");
printf("\t--blanks : keep blank text node\n");
printf("\t--push : use the push mode of the parser\n"); printf("\t--push : use the push mode of the parser\n");
printf("\t--nowarning : do not emit warnings from parser/validator\n");
printf("\t--noblanks : drop (ignorable?) blanks spaces\n");
} }
xmlCleanupParser(); xmlCleanupParser();
xmlMemoryDump(); xmlMemoryDump();

811
tree.c

File diff suppressed because it is too large Load Diff

121
tree.h
View File

@ -36,24 +36,22 @@ typedef enum {
XML_DOCUMENT_TYPE_NODE= 10, XML_DOCUMENT_TYPE_NODE= 10,
XML_DOCUMENT_FRAG_NODE= 11, XML_DOCUMENT_FRAG_NODE= 11,
XML_NOTATION_NODE= 12, XML_NOTATION_NODE= 12,
XML_HTML_DOCUMENT_NODE= 13 XML_HTML_DOCUMENT_NODE= 13,
XML_DTD_NODE= 14,
XML_ELEMENT_DECL= 15,
XML_ATTRIBUTE_DECL= 16,
XML_ENTITY_DECL= 17
} xmlElementType; } xmlElementType;
/* /*
* Size of an internal character representation. * Size of an internal character representation.
* *
* Currently we use 8bit chars internal representation for memory efficiency, * We use 8bit chars internal representation for memory efficiency,
* but the parser is not tied to that, just define UNICODE to switch to * Note that with 8 bits wide xmlChars one can still use UTF-8 to handle
* a 16 bits internal representation. Note that with 8 bits wide * correctly non ISO-Latin input.
* xmlChars one can still use UTF-8 to handle correctly non ISO-Latin
* input.
*/ */
#ifdef UNICODE
typedef unsigned short xmlChar;
#else
typedef unsigned char xmlChar; typedef unsigned char xmlChar;
#endif
#ifndef WIN32 #ifndef WIN32
#ifndef CHAR #ifndef CHAR
@ -109,14 +107,25 @@ struct _xmlEnumeration {
typedef struct _xmlAttribute xmlAttribute; typedef struct _xmlAttribute xmlAttribute;
typedef xmlAttribute *xmlAttributePtr; typedef xmlAttribute *xmlAttributePtr;
struct _xmlAttribute { struct _xmlAttribute {
const xmlChar *elem; /* Element holding the attribute */ #ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */
const xmlChar *name; /* Attribute name */ const xmlChar *name; /* Attribute name */
struct _xmlAttribute *next; /* list of attributes of an element */ struct _xmlNode *children; /* NULL */
xmlAttributeType type; /* The type */ struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
struct _xmlAttribute *nexth; /* next in hash table */
xmlAttributeType atype; /* The attribute type */
xmlAttributeDefault def; /* the default */ xmlAttributeDefault def; /* the default */
const xmlChar *defaultValue; /* or the default value */ const xmlChar *defaultValue; /* or the default value */
xmlEnumerationPtr tree; /* or the enumeration tree if any */ xmlEnumerationPtr tree; /* or the enumeration tree if any */
const xmlChar *prefix; /* the namespace prefix if any */ const xmlChar *prefix; /* the namespace prefix if any */
const xmlChar *elem; /* Element holding the attribute */
}; };
/* /*
@ -156,8 +165,19 @@ typedef enum {
typedef struct _xmlElement xmlElement; typedef struct _xmlElement xmlElement;
typedef xmlElement *xmlElementPtr; typedef xmlElement *xmlElementPtr;
struct _xmlElement { struct _xmlElement {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ELEMENT_DECL, must be second ! */
const xmlChar *name; /* Element name */ const xmlChar *name; /* Element name */
xmlElementTypeVal type; /* The type */ struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlElementTypeVal etype; /* The type */
xmlElementContentPtr content; /* the allowed element content */ xmlElementContentPtr content; /* the allowed element content */
xmlAttributePtr attributes; /* List of the declared attributes */ xmlAttributePtr attributes; /* List of the declared attributes */
}; };
@ -188,14 +208,25 @@ struct _xmlNs {
typedef struct _xmlDtd xmlDtd; typedef struct _xmlDtd xmlDtd;
typedef xmlDtd *xmlDtdPtr; typedef xmlDtd *xmlDtdPtr;
struct _xmlDtd { struct _xmlDtd {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_DTD_NODE, must be second ! */
const xmlChar *name; /* Name of the DTD */ const xmlChar *name; /* Name of the DTD */
const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */ struct _xmlNode *children; /* the value of the property link */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */ struct _xmlNode *last; /* last child link */
struct _xmlDoc *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
/* End of common part */
void *notations; /* Hash table for notations if any */ void *notations; /* Hash table for notations if any */
void *elements; /* Hash table for elements if any */ void *elements; /* Hash table for elements if any */
void *attributes; /* Hash table for attributes if any */ void *attributes; /* Hash table for attributes if any */
void *entities; /* Hash table for entities if any */ void *entities; /* Hash table for entities if any */
/* struct xmlDtd *next; * next link for this document */ const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */
}; };
/* /*
@ -206,14 +237,17 @@ typedef xmlAttr *xmlAttrPtr;
struct _xmlAttr { struct _xmlAttr {
#ifndef XML_WITHOUT_CORBA #ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */ void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif #endif
xmlElementType type; /* XML_ATTRIBUTE_NODE, must be third ! */ xmlElementType type; /* XML_ATTRIBUTE_NODE, must be second ! */
struct _xmlNode *node; /* attr->node link */
struct _xmlAttr *next; /* attribute list link */
const xmlChar *name; /* the name of the property */ const xmlChar *name; /* the name of the property */
struct _xmlNode *val; /* the value of the property */ struct _xmlNode *children; /* the value of the property */
struct _xmlNode *last; /* NULL */
struct _xmlNode *parent; /* child->parent link */
struct _xmlAttr *next; /* next sibling link */
struct _xmlAttr *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlNs *ns; /* pointer to the associated namespace */ xmlNs *ns; /* pointer to the associated namespace */
xmlAttributeType atype; /* the attribute type if validating */
}; };
/* /*
@ -266,24 +300,25 @@ typedef xmlNode *xmlNodePtr;
struct _xmlNode { struct _xmlNode {
#ifndef XML_WITHOUT_CORBA #ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */ void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif #endif
xmlElementType type; /* type number in the DTD, must be third ! */ xmlElementType type; /* type number, must be second ! */
struct _xmlDoc *doc; /* the containing document */ const xmlChar *name; /* the name of the node, or the entity */
struct _xmlNode *children; /* parent->childs link */
struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */ struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */ struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */ struct _xmlNode *prev; /* previous sibling link */
struct _xmlNode *childs; /* parent->childs link */ struct _xmlDoc *doc; /* the containing document */
struct _xmlNode *last; /* last child link */
struct _xmlAttr *properties;/* properties list */
const xmlChar *name; /* the name of the node, or the entity */
xmlNs *ns; /* pointer to the associated namespace */ xmlNs *ns; /* pointer to the associated namespace */
xmlNs *nsDef; /* namespace definitions on this node */
#ifndef XML_USE_BUFFER_CONTENT #ifndef XML_USE_BUFFER_CONTENT
xmlChar *content; /* the content */ xmlChar *content; /* the content */
#else #else
xmlBufferPtr content; /* the content in a buffer */ xmlBufferPtr content; /* the content in a buffer */
#endif #endif
/* End of common part */
struct _xmlAttr *properties;/* properties list */
xmlNs *nsDef; /* namespace definitions on this node */
}; };
/* /*
@ -294,20 +329,27 @@ typedef xmlDoc *xmlDocPtr;
struct _xmlDoc { struct _xmlDoc {
#ifndef XML_WITHOUT_CORBA #ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */ void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif #endif
xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */ xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */
char *name; /* name/filename/URI of the document */ char *name; /* name/filename/URI of the document */
const xmlChar *version; /* the XML version string */ struct _xmlNode *children; /* the document tree */
const xmlChar *encoding; /* encoding, if any */ struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* autoreference to itself */
/* End of common part */
int compression;/* level of zlib compression */ int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */ int standalone; /* standalone document (no external refs) */
struct _xmlDtd *intSubset; /* the document internal subset */ struct _xmlDtd *intSubset; /* the document internal subset */
struct _xmlDtd *extSubset; /* the document external subset */ struct _xmlDtd *extSubset; /* the document external subset */
struct _xmlNs *oldNs; /* Global namespace, the old way */ struct _xmlNs *oldNs; /* Global namespace, the old way */
struct _xmlNode *root; /* the document tree */ const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* encoding, if any */
void *ids; /* Hash table for ID attributes if any */ void *ids; /* Hash table for ID attributes if any */
void *refs; /* Hash table for IDREFs attributes if any */ void *refs; /* Hash table for IDREFs attributes if any */
const xmlChar *URL; /* The URI for that document */
}; };
/* /*
@ -422,6 +464,8 @@ xmlNodePtr xmlNewComment (const xmlChar *content);
xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc, xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc,
const xmlChar *content, const xmlChar *content,
int len); int len);
xmlNodePtr xmlNewCharRef (xmlDocPtr doc,
const xmlChar *name);
xmlNodePtr xmlNewReference (xmlDocPtr doc, xmlNodePtr xmlNewReference (xmlDocPtr doc,
const xmlChar *name); const xmlChar *name);
xmlNodePtr xmlCopyNode (xmlNodePtr node, xmlNodePtr xmlCopyNode (xmlNodePtr node,
@ -513,13 +557,14 @@ xmlChar * xmlNodeGetContent (xmlNodePtr cur);
xmlChar * xmlNodeGetLang (xmlNodePtr cur); xmlChar * xmlNodeGetLang (xmlNodePtr cur);
void xmlNodeSetLang (xmlNodePtr cur, void xmlNodeSetLang (xmlNodePtr cur,
const xmlChar *lang); const xmlChar *lang);
int xmlNodeGetSpacePreserve (xmlNodePtr cur);
xmlChar * xmlNodeGetBase (xmlDocPtr doc, xmlChar * xmlNodeGetBase (xmlDocPtr doc,
xmlNodePtr cur); xmlNodePtr cur);
/* /*
* Removing content. * Removing content.
*/ */
int xmlRemoveProp (xmlAttrPtr attr); /* TODO */ int xmlRemoveProp (xmlAttrPtr attr);
int xmlRemoveNode (xmlNodePtr node); /* TODO */ int xmlRemoveNode (xmlNodePtr node); /* TODO */
/* /*
@ -532,6 +577,12 @@ void xmlBufferWriteChar (xmlBufferPtr buf,
void xmlBufferWriteQuotedString(xmlBufferPtr buf, void xmlBufferWriteQuotedString(xmlBufferPtr buf,
const xmlChar *string); const xmlChar *string);
/*
* Namespace handling
*/
int xmlReconciliateNs (xmlDocPtr doc,
xmlNodePtr tree);
/* /*
* Saving * Saving
*/ */

888
valid.c

File diff suppressed because it is too large Load Diff

24
valid.h
View File

@ -29,6 +29,14 @@ struct _xmlValidCtxt {
void *userData; /* user specific data block */ void *userData; /* user specific data block */
xmlValidityErrorFunc error; /* the callback in case of errors */ xmlValidityErrorFunc error; /* the callback in case of errors */
xmlValidityWarningFunc warning; /* the callback in case of warning */ xmlValidityWarningFunc warning; /* the callback in case of warning */
/* Node analysis stack used when validating within entities */
xmlNodePtr node; /* Current parsed Node */
int nodeNr; /* Depth of the parsing stack */
int nodeMax; /* Max depth of the parsing stack */
xmlNodePtr *nodeTab; /* array of nodes */
int finishDtd; /* finished validating the Dtd ? */
}; };
/* /*
@ -114,6 +122,8 @@ xmlNotationPtr xmlAddNotationDecl (xmlValidCtxtPtr ctxt,
const xmlChar *SystemID); const xmlChar *SystemID);
xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table); xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table);
void xmlFreeNotationTable(xmlNotationTablePtr table); void xmlFreeNotationTable(xmlNotationTablePtr table);
void xmlDumpNotationDecl (xmlBufferPtr buf,
xmlNotationPtr nota);
void xmlDumpNotationTable(xmlBufferPtr buf, void xmlDumpNotationTable(xmlBufferPtr buf,
xmlNotationTablePtr table); xmlNotationTablePtr table);
@ -122,6 +132,9 @@ xmlElementContentPtr xmlNewElementContent (xmlChar *name,
xmlElementContentType type); xmlElementContentType type);
xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content); xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content);
void xmlFreeElementContent(xmlElementContentPtr cur); void xmlFreeElementContent(xmlElementContentPtr cur);
void xmlSprintfElementContent(char *buf,
xmlElementContentPtr content,
int glob);
/* Element */ /* Element */
xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt, xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt,
@ -133,6 +146,8 @@ xmlElementTablePtr xmlCopyElementTable (xmlElementTablePtr table);
void xmlFreeElementTable (xmlElementTablePtr table); void xmlFreeElementTable (xmlElementTablePtr table);
void xmlDumpElementTable (xmlBufferPtr buf, void xmlDumpElementTable (xmlBufferPtr buf,
xmlElementTablePtr table); xmlElementTablePtr table);
void xmlDumpElementDecl (xmlBufferPtr buf,
xmlElementPtr elem);
/* Enumeration */ /* Enumeration */
xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name); xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name);
@ -144,6 +159,7 @@ xmlAttributePtr xmlAddAttributeDecl (xmlValidCtxtPtr ctxt,
xmlDtdPtr dtd, xmlDtdPtr dtd,
const xmlChar *elem, const xmlChar *elem,
const xmlChar *name, const xmlChar *name,
const xmlChar *prefix,
xmlAttributeType type, xmlAttributeType type,
xmlAttributeDefault def, xmlAttributeDefault def,
const xmlChar *defaultValue, const xmlChar *defaultValue,
@ -152,6 +168,8 @@ xmlAttributeTablePtr xmlCopyAttributeTable (xmlAttributeTablePtr table);
void xmlFreeAttributeTable (xmlAttributeTablePtr table); void xmlFreeAttributeTable (xmlAttributeTablePtr table);
void xmlDumpAttributeTable (xmlBufferPtr buf, void xmlDumpAttributeTable (xmlBufferPtr buf,
xmlAttributeTablePtr table); xmlAttributeTablePtr table);
void xmlDumpAttributeDecl (xmlBufferPtr buf,
xmlAttributePtr attr);
/* IDs */ /* IDs */
xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt, xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt,
@ -188,6 +206,10 @@ int xmlValidateRoot (xmlValidCtxtPtr ctxt,
int xmlValidateElementDecl (xmlValidCtxtPtr ctxt, int xmlValidateElementDecl (xmlValidCtxtPtr ctxt,
xmlDocPtr doc, xmlDocPtr doc,
xmlElementPtr elem); xmlElementPtr elem);
xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc,
xmlNodePtr elem,
const xmlChar *name,
const xmlChar *value);
int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt,
xmlDocPtr doc, xmlDocPtr doc,
xmlAttributePtr attr); xmlAttributePtr attr);
@ -199,6 +221,8 @@ int xmlValidateNotationDecl (xmlValidCtxtPtr ctxt,
int xmlValidateDtd (xmlValidCtxtPtr ctxt, int xmlValidateDtd (xmlValidCtxtPtr ctxt,
xmlDocPtr doc, xmlDocPtr doc,
xmlDtdPtr dtd); xmlDtdPtr dtd);
int xmlValidateDtdFinal (xmlValidCtxtPtr ctxt,
xmlDocPtr doc);
int xmlValidateDocument (xmlValidCtxtPtr ctxt, int xmlValidateDocument (xmlValidCtxtPtr ctxt,
xmlDocPtr doc); xmlDocPtr doc);
int xmlValidateElement (xmlValidCtxtPtr ctxt, int xmlValidateElement (xmlValidCtxtPtr ctxt,

View File

@ -115,8 +115,22 @@ typedef enum {
XML_ERR_ENCODING_NAME, /* 80 */ XML_ERR_ENCODING_NAME, /* 80 */
XML_ERR_HYPHEN_IN_COMMENT /* 81 */ XML_ERR_HYPHEN_IN_COMMENT, /* 81 */
XML_ERR_INVALID_ENCODING, /* 82 */
XML_ERR_EXT_ENTITY_STANDALONE, /* 83 */
XML_ERR_CONDSEC_INVALID, /* 84 */
XML_ERR_VALUE_REQUIRED, /* 85 */
XML_ERR_NOT_WELL_BALANCED, /* 86 */
XML_ERR_EXTRA_CONTENT, /* 87 */
XML_ERR_ENTITY_CHAR_ERROR, /* 88 */
XML_ERR_ENTITY_PE_INTERNAL, /* 88 */
XML_ERR_ENTITY_LOOP, /* 89 */
XML_ERR_ENTITY_BOUNDARY /* 90 */
}xmlParserErrors; }xmlParserErrors;
void xmlParserError (void *ctx, void xmlParserError (void *ctx,

43
xmlIO.c
View File

@ -118,6 +118,7 @@ xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
* If filename is "-' then we use stdin as the input. * If filename is "-' then we use stdin as the input.
* Automatic support for ZLIB/Compress compressed document is provided * Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time. * by default if found at compile-time.
* Do an encoding check if enc == XML_CHAR_ENCODING_NONE
* *
* Returns the new parser input or NULL * Returns the new parser input or NULL
*/ */
@ -201,13 +202,10 @@ xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
} }
#endif #endif
} }
/*
* TODO : get the 4 first bytes and decode the charset
* if enc == XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines here. !!!
* enc = xmlDetectCharEncoding(buffer);
*/
/*
* Allocate the Input buffer front-end.
*/
ret = xmlAllocParserInputBuffer(enc); ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL) { if (ret != NULL) {
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
@ -218,7 +216,6 @@ xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
ret->httpIO = httpIO; ret->httpIO = httpIO;
ret->ftpIO = ftpIO; ret->ftpIO = ftpIO;
} }
xmlParserInputBufferRead(ret, 4);
return(ret); return(ret);
} }
@ -289,19 +286,30 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
if (len < 0) return(0); if (len < 0) return(0);
if (in->encoder != NULL) { if (in->encoder != NULL) {
xmlChar *buffer; xmlChar *buffer;
int processed = len;
buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar)); buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
if (buffer == NULL) { if (buffer == NULL) {
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n"); fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
xmlFree(buffer);
return(-1); return(-1);
} }
nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar), nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
(xmlChar *) buf, len); (xmlChar *) buf, &processed);
/* /*
* TODO : we really need to have something atomic or the * TODO : we really need to have something atomic or the
* encoder must report the number of bytes read * encoder must report the number of bytes read
*/ */
if (nbchars < 0) {
fprintf(stderr, "xmlParserInputBufferPush: encoder error\n");
xmlFree(buffer);
return(-1);
}
if (processed != len) {
fprintf(stderr,
"TODO xmlParserInputBufferPush: processed != len\n");
xmlFree(buffer);
return(-1);
}
buffer[nbchars] = 0; buffer[nbchars] = 0;
xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars); xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
xmlFree(buffer); xmlFree(buffer);
@ -382,6 +390,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
} }
if (in->encoder != NULL) { if (in->encoder != NULL) {
xmlChar *buf; xmlChar *buf;
int wrote = res;
buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar)); buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
if (buf == NULL) { if (buf == NULL) {
@ -390,10 +399,24 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
return(-1); return(-1);
} }
nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar), nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
BAD_CAST buffer, res); BAD_CAST buffer, &wrote);
buf[nbchars] = 0; buf[nbchars] = 0;
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars); xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
xmlFree(buf); xmlFree(buf);
/*
* Check that the encoder was able to process the full input
*/
if (wrote != res) {
fprintf(stderr,
"TODO : xmlParserInputBufferGrow wrote %d != res %d\n",
wrote, res);
/*
* TODO !!!
* Need to keep the unprocessed input in a buffer in->unprocessed
*/
}
} else { } else {
nbchars = res; nbchars = res;
buffer[nbchars] = 0; buffer[nbchars] = 0;

View File

@ -8,7 +8,7 @@
#ifndef _DEBUG_MEMORY_ALLOC_ #ifndef _DEBUG_MEMORY_ALLOC_
#define _DEBUG_MEMORY_ALLOC_ #define _DEBUG_MEMORY_ALLOC_
#define NO_DEBUG_MEMORY /* #define NO_DEBUG_MEMORY */
#ifdef NO_DEBUG_MEMORY #ifdef NO_DEBUG_MEMORY
#ifdef HAVE_MALLOC_H #ifdef HAVE_MALLOC_H

59
xpath.c
View File

@ -213,9 +213,9 @@ PUSH_AND_POP(xmlXPathObjectPtr, value)
* Dirty macros, i.e. one need to make assumption on the context to use them * Dirty macros, i.e. one need to make assumption on the context to use them
* *
* CUR_PTR return the current pointer to the xmlChar to be parsed. * CUR_PTR return the current pointer to the xmlChar to be parsed.
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled * CUR returns the current xmlChar value, i.e. a 8 bit value
* in ISO-Latin or UTF-8, and the current 16 bit value if compiled * in ISO-Latin or UTF-8.
* in UNICODE mode. This should be used internally by the parser * This should be used internally by the parser
* only to compare to ASCII values otherwise it would break when * only to compare to ASCII values otherwise it would break when
* running with UTF-8 encoding. * running with UTF-8 encoding.
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
@ -237,11 +237,8 @@ PUSH_AND_POP(xmlXPathObjectPtr, value)
#define SKIP_BLANKS \ #define SKIP_BLANKS \
while (IS_BLANK(*(ctxt->cur))) NEXT while (IS_BLANK(*(ctxt->cur))) NEXT
#ifndef USE_UTF_8
#define CURRENT (*ctxt->cur) #define CURRENT (*ctxt->cur)
#define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur) #define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur)
#else
#endif
/************************************************************************ /************************************************************************
* * * *
@ -877,7 +874,7 @@ xmlXPathFreeContext(xmlXPathContextPtr ctxt) {
fprintf(xmlXPathDebug, "%s:%d Internal error: no document\n", \ fprintf(xmlXPathDebug, "%s:%d Internal error: no document\n", \
__FILE__, __LINE__); \ __FILE__, __LINE__); \
} \ } \
if (ctxt->doc->root == NULL) { \ if (ctxt->doc->children == NULL) { \
fprintf(xmlXPathDebug, \ fprintf(xmlXPathDebug, \
"%s:%d Internal error: document without root\n", \ "%s:%d Internal error: document without root\n", \
__FILE__, __LINE__); \ __FILE__, __LINE__); \
@ -1496,14 +1493,18 @@ xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_PI_NODE: case XML_PI_NODE:
case XML_COMMENT_NODE: case XML_COMMENT_NODE:
case XML_NOTATION_NODE: case XML_NOTATION_NODE:
return(ctxt->context->node->childs); case XML_DTD_NODE:
case XML_ATTRIBUTE_NODE: return(ctxt->context->node->children);
return(NULL);
case XML_DOCUMENT_NODE: case XML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_TYPE_NODE:
case XML_DOCUMENT_FRAG_NODE: case XML_DOCUMENT_FRAG_NODE:
case XML_HTML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE:
return(((xmlDocPtr) ctxt->context->node)->root); return(((xmlDocPtr) ctxt->context->node)->children);
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
case XML_ATTRIBUTE_NODE:
return(NULL);
} }
return(NULL); return(NULL);
} }
@ -1533,11 +1534,11 @@ xmlXPathNextDescendant(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
return(NULL); return(NULL);
if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc) if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc)
return(ctxt->context->doc->root); return(ctxt->context->doc->children);
return(ctxt->context->node->childs); return(ctxt->context->node->children);
} }
if (cur->childs != NULL) return(cur->childs); if (cur->children != NULL) return(cur->children);
if (cur->next != NULL) return(cur->next); if (cur->next != NULL) return(cur->next);
do { do {
@ -1606,13 +1607,17 @@ xmlXPathNextParent(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_PI_NODE: case XML_PI_NODE:
case XML_COMMENT_NODE: case XML_COMMENT_NODE:
case XML_NOTATION_NODE: case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
if (ctxt->context->node->parent == NULL) if (ctxt->context->node->parent == NULL)
return((xmlNodePtr) ctxt->context->doc); return((xmlNodePtr) ctxt->context->doc);
return(ctxt->context->node->parent); return(ctxt->context->node->parent);
case XML_ATTRIBUTE_NODE: { case XML_ATTRIBUTE_NODE: {
xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node; xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node;
return(att->node); return(att->parent);
} }
case XML_DOCUMENT_NODE: case XML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_TYPE_NODE:
@ -1655,6 +1660,10 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_ENTITY_NODE: case XML_ENTITY_NODE:
case XML_PI_NODE: case XML_PI_NODE:
case XML_COMMENT_NODE: case XML_COMMENT_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
case XML_NOTATION_NODE: case XML_NOTATION_NODE:
if (ctxt->context->node->parent == NULL) if (ctxt->context->node->parent == NULL)
return((xmlNodePtr) ctxt->context->doc); return((xmlNodePtr) ctxt->context->doc);
@ -1662,7 +1671,7 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_ATTRIBUTE_NODE: { case XML_ATTRIBUTE_NODE: {
xmlAttrPtr cur = (xmlAttrPtr) ctxt->context->node; xmlAttrPtr cur = (xmlAttrPtr) ctxt->context->node;
return(cur->node); return(cur->parent);
} }
case XML_DOCUMENT_NODE: case XML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_TYPE_NODE:
@ -1672,7 +1681,7 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
} }
return(NULL); return(NULL);
} }
if (cur == ctxt->context->doc->root) if (cur == ctxt->context->doc->children)
return((xmlNodePtr) ctxt->context->doc); return((xmlNodePtr) ctxt->context->doc);
if (cur == (xmlNodePtr) ctxt->context->doc) if (cur == (xmlNodePtr) ctxt->context->doc)
return(NULL); return(NULL);
@ -1685,11 +1694,15 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_PI_NODE: case XML_PI_NODE:
case XML_COMMENT_NODE: case XML_COMMENT_NODE:
case XML_NOTATION_NODE: case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
return(cur->parent); return(cur->parent);
case XML_ATTRIBUTE_NODE: { case XML_ATTRIBUTE_NODE: {
xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node; xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node;
return(att->node); return(att->parent);
} }
case XML_DOCUMENT_NODE: case XML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_TYPE_NODE:
@ -1780,13 +1793,13 @@ xmlXPathNextFollowing(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
return(NULL); return(NULL);
if (cur == NULL) if (cur == NULL)
return(ctxt->context->node->next);; /* !!!!!!!!! */ return(ctxt->context->node->next);; /* !!!!!!!!! */
if (cur->childs != NULL) return(cur->childs); if (cur->children != NULL) return(cur->children);
if (cur->next != NULL) return(cur->next); if (cur->next != NULL) return(cur->next);
do { do {
cur = cur->parent; cur = cur->parent;
if (cur == NULL) return(NULL); if (cur == NULL) return(NULL);
if (cur == ctxt->context->doc->root) return(NULL); if (cur == ctxt->context->doc->children) return(NULL);
if (cur->next != NULL) { if (cur->next != NULL) {
cur = cur->next; cur = cur->next;
return(cur); return(cur);
@ -1820,7 +1833,7 @@ xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
do { do {
cur = cur->parent; cur = cur->parent;
if (cur == NULL) return(NULL); if (cur == NULL) return(NULL);
if (cur == ctxt->context->doc->root) return(NULL); if (cur == ctxt->context->doc->children) return(NULL);
if (cur->prev != NULL) { if (cur->prev != NULL) {
cur = cur->prev; cur = cur->prev;
return(cur); return(cur);
@ -2278,7 +2291,7 @@ xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs) {
ID = xmlStrndup(tokens, cur - tokens); ID = xmlStrndup(tokens, cur - tokens);
attr = xmlGetID(ctxt->context->doc, ID); attr = xmlGetID(ctxt->context->doc, ID);
if (attr != NULL) { if (attr != NULL) {
elem = attr->node; elem = attr->parent;
xmlXPathNodeSetAdd(ret->nodesetval, elem); xmlXPathNodeSetAdd(ret->nodesetval, elem);
} }
if (ID != NULL) if (ID != NULL)
@ -3677,6 +3690,8 @@ xmlXPathEvalPathExpr(xmlXPathParserContextPtr ctxt) {
if (name != NULL) if (name != NULL)
xmlFree(name); xmlFree(name);
} }
if (ctxt->context->nodelist != NULL)
valuePush(ctxt, xmlXPathNewNodeSetList(ctxt->context->nodelist));
} }
/** /**