1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-28 00:21:53 +03:00

This is the 2.0.0-beta, lots and lots and lots of changes

Have a look at http://xmlsoft.org/upgrade.html

Daniel
This commit is contained in:
Daniel Veillard
2000-03-14 18:30:20 +00:00
parent 76234da152
commit cf46199c5e
91 changed files with 9978 additions and 5547 deletions

View File

@ -1,3 +1,11 @@
Tue Mar 14 19:11:29 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* all: tagged LIB_XML_1_X
* *.c *.h : updated from W3C CVS tree
* configure.in : 2.0.0-beta
* libxml.spec.in : libxml2 package nam
* result/* : new version of the tests output
Mon Mar 6 09:34:52 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* doc/xml.html, doc/update.html: updated docs, 1.8.7

View File

@ -121,36 +121,81 @@ PUSH_AND_POP(extern, xmlChar*, name)
* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
*/
#define CUR (*ctxt->input->cur)
#define CUR ((int) (*ctxt->input->cur))
#define UPPER (toupper(*ctxt->input->cur))
#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
#define NXT(val) ctxt->input->cur[(val)]
#define UPP(val) (toupper(ctxt->input->cur[(val)]))
#define CUR_PTR ctxt->input->cur
#define SHRINK xmlParserInputShrink(ctxt->input)
#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
#define SKIP_BLANKS \
while (IS_BLANK(*(ctxt->input->cur))) NEXT
#define CURRENT ((int) (*ctxt->input->cur))
#ifndef USE_UTF_8
#define CURRENT (*ctxt->input->cur)
#define NEXT { \
if ((*ctxt->input->cur == 0) && \
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
xmlPopInput(ctxt); \
} else { \
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
ctxt->input->cur++; \
ctxt->nbChars++; \
if (*ctxt->input->cur == 0) \
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
}}
#define NEXT htmlNextChar(ctxt);
#else
#endif
#define SKIP_BLANKS htmlSkipBlankChars(ctxt);
/**
* htmlNextChar:
* @ctxt: the HTML parser context
*
* Skip to the next char input char.
*/
void
htmlNextChar(htmlParserCtxtPtr ctxt) {
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
xmlPopInput(ctxt);
} else {
if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else ctxt->input->col++;
ctxt->input->cur++;
ctxt->nbChars++;
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
}
}
/**
* htmlSkipBlankChars:
* @ctxt: the HTML parser context
*
* skip all blanks character found at that point in the input streams.
*
* Returns the number of space chars skipped
*/
int
htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int res = 0;
while (IS_BLANK(*(ctxt->input->cur))) {
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
xmlPopInput(ctxt);
} else {
if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else ctxt->input->col++;
ctxt->input->cur++;
ctxt->nbChars++;
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
}
res++;
}
return(res);
}
@ -475,7 +520,7 @@ htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
if (elem == NULL) return(1);
if (!xmlStrcmp(name, elem->name)) return(0);
if (htmlCheckAutoClose(elem->name, name)) return(1);
child = elem->childs;
child = elem->children;
while (child != NULL) {
if (htmlAutoCloseTag(doc, name, child)) return(1);
child = child->next;
@ -499,7 +544,7 @@ htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
htmlNodePtr child;
if (elem == NULL) return(1);
child = elem->childs;
child = elem->children;
while (child != NULL) {
if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
child = child->next;
@ -1275,7 +1320,7 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
else
xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI);
cur->name = NULL;
cur->root = NULL;
cur->children = NULL;
cur->extSubset = NULL;
cur->oldNs = NULL;
cur->encoding = NULL;
@ -1285,7 +1330,6 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
cur->refs = NULL;
#ifndef XML_WITHOUT_CORBA
cur->_private = NULL;
cur->vepv = NULL;
#endif
return(cur);
}
@ -1667,7 +1711,8 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
}
} else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
ctxt->sax->error(ctxt->userData,
"SystemLiteral \" or ' expected\n");
ctxt->wellFormed = 0;
}

View File

@ -80,7 +80,7 @@ htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
}
xmlBufferWriteChar(buf, " ");
xmlBufferWriteCHAR(buf, cur->name);
value = xmlNodeListGetString(doc, cur->val, 0);
value = xmlNodeListGetString(doc, cur->children, 0);
if (value) {
xmlBufferWriteChar(buf, "=");
xmlBufferWriteQuotedString(buf, value);
@ -212,7 +212,7 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
}
return;
}
if ((cur->content == NULL) && (cur->childs == NULL)) {
if ((cur->content == NULL) && (cur->children == NULL)) {
if ((info != NULL) && (info->endTag != 0))
xmlBufferWriteChar(buf, ">");
else {
@ -242,15 +242,15 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
xmlFree(buffer);
}
}
if (cur->childs != NULL) {
if ((cur->childs->type != HTML_TEXT_NODE) &&
(cur->childs->type != HTML_ENTITY_REF_NODE) &&
(cur->childs != cur->last))
if (cur->children != NULL) {
if ((cur->children->type != HTML_TEXT_NODE) &&
(cur->children->type != HTML_ENTITY_REF_NODE) &&
(cur->children != cur->last))
xmlBufferWriteChar(buf, "\n");
htmlNodeListDump(buf, doc, cur->childs);
htmlNodeListDump(buf, doc, cur->children);
if ((cur->last->type != HTML_TEXT_NODE) &&
(cur->last->type != HTML_ENTITY_REF_NODE) &&
(cur->childs != cur->last))
(cur->children != cur->last))
xmlBufferWriteChar(buf, "\n");
}
if (!htmlIsAutoClosed(doc, cur)) {
@ -307,8 +307,8 @@ htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
}
if (cur->root != NULL) {
htmlNodeListDump(buf, cur, cur->root);
if (cur->children != NULL) {
htmlNodeListDump(buf, cur, cur->children);
}
xmlBufferWriteChar(buf, "\n");
cur->type = type;

312
SAX.c
View File

@ -158,66 +158,112 @@ internalSubset(void *ctx, const xmlChar *name,
name, ExternalID, SystemID);
#endif
xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
}
/**
* externalSubset:
* @ctx: the user data (XML parser context)
*
* Callback on external subset declaration.
*/
void
externalSubset(void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.externalSubset(%s, %s, %s)\n",
name, ExternalID, SystemID);
#endif
if (((ExternalID != NULL) || (SystemID != NULL)) &&
(ctxt->validate && ctxt->wellFormed && ctxt->myDoc)) {
/*
* Try to fetch and parse the external subset.
*/
xmlDtdPtr ret = NULL;
xmlParserCtxtPtr dtdCtxt;
xmlParserInputPtr oldinput;
int oldinputNr;
int oldinputMax;
xmlParserInputPtr *oldinputTab;
int oldwellFormed;
xmlParserInputPtr input = NULL;
xmlCharEncoding enc;
dtdCtxt = xmlNewParserCtxt();
if (dtdCtxt == NULL) return;
/*
* Ask the Entity resolver to load the damn thing
*/
if ((ctxt->directory != NULL) && (dtdCtxt->directory == NULL))
dtdCtxt->directory = (char *) xmlStrdup(BAD_CAST ctxt->directory);
if ((dtdCtxt->sax != NULL) && (dtdCtxt->sax->resolveEntity != NULL))
input = dtdCtxt->sax->resolveEntity(dtdCtxt->userData, ExternalID,
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
SystemID);
if (input == NULL) {
xmlFreeParserCtxt(dtdCtxt);
return;
}
xmlNewDtd(ctxt->myDoc, name, ExternalID, SystemID);
/*
* plug some encoding conversion routines here. !!!
* make sure we won't destroy the main document context
*/
xmlPushInput(dtdCtxt, input);
enc = xmlDetectCharEncoding(dtdCtxt->input->cur);
xmlSwitchEncoding(dtdCtxt, enc);
oldinput = ctxt->input;
oldinputNr = ctxt->inputNr;
oldinputMax = ctxt->inputMax;
oldinputTab = ctxt->inputTab;
oldwellFormed = ctxt->wellFormed;
ctxt->inputTab = (xmlParserInputPtr *)
xmlMalloc(5 * sizeof(xmlParserInputPtr));
if (ctxt->inputTab == NULL) {
ctxt->errNo = XML_ERR_NO_MEMORY;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"externalSubset: out of memory\n");
ctxt->errNo = XML_ERR_NO_MEMORY;
ctxt->input = oldinput;
ctxt->inputNr = oldinputNr;
ctxt->inputMax = oldinputMax;
ctxt->inputTab = oldinputTab;
return;
}
ctxt->inputNr = 0;
ctxt->inputMax = 5;
ctxt->input = NULL;
xmlPushInput(ctxt, input);
/*
* On the fly encoding conversion if needed
*/
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
xmlSwitchEncoding(ctxt, enc);
if (input->filename == NULL)
input->filename = (char *) xmlStrdup(SystemID);
input->line = 1;
input->col = 1;
input->base = dtdCtxt->input->cur;
input->cur = dtdCtxt->input->cur;
input->base = ctxt->input->cur;
input->cur = ctxt->input->cur;
input->free = NULL;
/*
* let's parse that entity knowing it's an external subset.
*/
xmlParseExternalSubset(dtdCtxt, ExternalID, SystemID);
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
if (dtdCtxt->myDoc != NULL) {
if (dtdCtxt->wellFormed) {
ret = dtdCtxt->myDoc->intSubset;
dtdCtxt->myDoc->intSubset = NULL;
} else {
ret = NULL;
}
xmlFreeDoc(dtdCtxt->myDoc);
dtdCtxt->myDoc = NULL;
}
xmlFreeParserCtxt(dtdCtxt);
/*
* Free up the external entities
*/
ctxt->myDoc->extSubset = ret;
while (ctxt->inputNr > 1)
xmlPopInput(ctxt);
xmlFreeInputStream(ctxt->input);
xmlFree(ctxt->inputTab);
/*
* Restore the parsing context of the main entity
*/
ctxt->input = oldinput;
ctxt->inputNr = oldinputNr;
ctxt->inputMax = oldinputMax;
ctxt->inputTab = oldinputTab;
/* ctxt->wellFormed = oldwellFormed; */
}
}
@ -316,13 +362,23 @@ entityDecl(void *ctx, const xmlChar *name, int type,
fprintf(stderr, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
name, type, publicId, systemId, content);
#endif
xmlAddDocEntity(ctxt->myDoc, name, type, publicId, systemId, content);
if (ctxt->inSubset == 1)
xmlAddDocEntity(ctxt->myDoc, name, type, publicId,
systemId, content);
else if (ctxt->inSubset == 2)
xmlAddDtdEntity(ctxt->myDoc, name, type, publicId,
systemId, content);
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.entityDecl(%s) called while not in subset\n", name);
}
}
/**
* attributeDecl:
* @ctx: the user data (XML parser context)
* @name: the attribute name
* @fullname: the attribute name
* @type: the attribute type
* @publicId: The public ID of the attribute
* @systemId: The system ID of the attribute
@ -331,24 +387,40 @@ entityDecl(void *ctx, const xmlChar *name, int type,
* An attribute definition has been parsed
*/
void
attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *name,
attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *fullname,
int type, int def, const xmlChar *defaultValue,
xmlEnumerationPtr tree)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlAttributePtr attr;
xmlChar *name = NULL, *prefix = NULL;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
elem, name, type, def, defaultValue);
elem, fullname, type, def, defaultValue);
#endif
name = xmlSplitQName(ctxt, fullname, &prefix);
if (ctxt->inSubset == 1)
attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem,
name, type, def, defaultValue, tree);
name, prefix, type, def, defaultValue, tree);
else if (ctxt->inSubset == 2)
attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, elem,
name, prefix, type, def, defaultValue, tree);
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.attributeDecl(%s) called while not in subset\n", name);
return;
}
if (attr == 0) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateAttributeDecl(&ctxt->vctxt, ctxt->myDoc,
attr);
if (prefix != NULL)
xmlFree(prefix);
if (name != NULL)
xmlFree(name);
}
/**
@ -367,16 +439,26 @@ elementDecl(void *ctx, const xmlChar *name, int type,
xmlElementContentPtr content)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlElementPtr elem;
xmlElementPtr elem = NULL;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.elementDecl(%s, %d, ...)\n",
name, type);
fullname, type);
#endif
if (ctxt->inSubset == 1)
elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset,
name, type, content);
if (elem == 0) ctxt->valid = 0;
else if (ctxt->inSubset == 2)
elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->extSubset,
name, type, content);
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.elementDecl(%s) called while not in subset\n", name);
return;
}
if (elem == NULL) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateElementDecl(&ctxt->vctxt, ctxt->myDoc, elem);
@ -396,15 +478,25 @@ notationDecl(void *ctx, const xmlChar *name,
const xmlChar *publicId, const xmlChar *systemId)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlNotationPtr nota;
xmlNotationPtr nota = NULL;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId);
#endif
if (ctxt->inSubset == 1)
nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
publicId, systemId);
if (nota == 0) ctxt->valid = 0;
else if (ctxt->inSubset == 2)
nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
publicId, systemId);
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.notationDecl(%s) called while not in subset\n", name);
return;
}
if (nota == NULL) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateNotationDecl(&ctxt->vctxt, ctxt->myDoc,
@ -518,6 +610,7 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlAttrPtr ret;
xmlChar *name;
xmlChar *ns;
xmlChar *nval;
xmlNsPtr namespace;
/****************
@ -528,7 +621,15 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
/*
* Split the full name into a namespace prefix and the tag name
*/
name = xmlSplitQName(fullname, &ns);
name = xmlSplitQName(ctxt, fullname, &ns);
/*
* Do the last stave of the attribute normalization
*/
nval = xmlValidNormalizeAttributeValue(ctxt->myDoc,
ctxt->node, fullname, value);
if (nval != NULL)
value = nval;
/*
* Check whether it's a namespace definition
@ -540,15 +641,28 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlNewNs(ctxt->node, value, NULL);
if (name != NULL)
xmlFree(name);
if (nval != NULL)
xmlFree(nval);
return;
}
if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
(ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
/*
* Validate also for namespace decls, they are attributes from
* an XML-1.0 perspective
TODO ... doesn't map well with current API
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
ctxt->node, ret, value);
*/
/* a standard namespace definition */
xmlNewNs(ctxt->node, value, name);
xmlFree(ns);
if (name != NULL)
xmlFree(name);
if (nval != NULL)
xmlFree(nval);
return;
}
@ -562,17 +676,52 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
ret = xmlNewNsProp(ctxt->node, namespace, name, NULL);
if (ret != NULL) {
if ((ctxt->replaceEntities == 0) && (!ctxt->html))
ret->val = xmlStringGetNodeList(ctxt->myDoc, value);
else
ret->val = xmlNewDocText(ctxt->myDoc, value);
if ((ctxt->replaceEntities == 0) && (!ctxt->html)) {
xmlNodePtr tmp;
ret->children = xmlStringGetNodeList(ctxt->myDoc, value);
tmp = ret->children;
while (tmp != NULL) {
tmp->parent = (xmlNodePtr) ret;
if (tmp->next == NULL)
ret->last = tmp;
tmp = tmp->next;
}
} else {
ret->children = xmlNewDocText(ctxt->myDoc, value);
ret->last = ret->children;
if (ret->children != NULL)
ret->children->parent = (xmlNodePtr) ret;
}
}
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->myDoc && ctxt->myDoc->intSubset) {
/*
* If we don't substitute entities, the validation should be
* done on a value with replaced entities anyway.
*/
if (!ctxt->replaceEntities) {
xmlChar *val;
ctxt->depth++;
val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
0,0,0);
ctxt->depth--;
if (val == NULL)
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
ctxt->myDoc, ctxt->node, ret, value);
else {
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
ctxt->myDoc, ctxt->node, ret, val);
xmlFree(val);
}
} else {
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
ctxt->node, ret, value);
else {
}
} else {
/*
* when validating, the ID registration is done at the attribute
* validation level. Otherwise we have to do specific handling here.
@ -583,6 +732,8 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlAddRef(&ctxt->vctxt, ctxt->myDoc, value, ret);
}
if (nval != NULL)
xmlFree(nval);
if (name != NULL)
xmlFree(name);
if (ns != NULL)
@ -634,7 +785,7 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
/*
* Split the full name into a namespace prefix and the tag name
*/
name = xmlSplitQName(fullname, &prefix);
name = xmlSplitQName(ctxt, fullname, &prefix);
/*
@ -644,13 +795,13 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
*/
ret = xmlNewDocNode(ctxt->myDoc, NULL, name, NULL);
if (ret == NULL) return;
if (ctxt->myDoc->root == NULL) {
if (ctxt->myDoc->children == NULL) {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting %s as root\n", name);
#endif
ctxt->myDoc->root = ret;
xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
} else if (parent == NULL) {
parent = ctxt->myDoc->root;
parent = ctxt->myDoc->children;
}
/*
@ -679,6 +830,15 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
}
}
/*
* If it's the Document root, finish the Dtd validation and
* check the document root element for validity
*/
if ((ctxt->validate) && (ctxt->vctxt.finishDtd == 0)) {
ctxt->valid &= xmlValidateDtdFinal(&ctxt->vctxt, ctxt->myDoc);
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
ctxt->vctxt.finishDtd = 1;
}
/*
* process all the attributes whose name start with "xml"
*/
@ -790,6 +950,9 @@ reference(void *ctx, const xmlChar *name)
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.reference(%s)\n", name);
#endif
if (name[0] == '#')
ret = xmlNewCharRef(ctxt->myDoc, name);
else
ret = xmlNewReference(ctxt->myDoc, name);
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "add reference %s to %s \n", name, ctxt->node->name);
@ -884,32 +1047,36 @@ processingInstruction(void *ctx, const xmlChar *target,
ret = xmlNewPI(target, data);
if (ret == NULL) return;
ret->doc = ctxt->myDoc;
if (ctxt->myDoc->root == NULL) {
parent = ctxt->node;
if (ctxt->inSubset == 1) {
xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
return;
} else if (ctxt->inSubset == 2) {
xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret);
return;
}
if ((ctxt->myDoc->children == NULL) || (parent == NULL)) {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting PI %s as root\n", target);
#endif
ctxt->myDoc->root = ret;
} else if (parent == NULL) {
parent = ctxt->myDoc->root;
xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
return;
}
if (parent != NULL) {
if (parent->type == XML_ELEMENT_NODE) {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "adding PI child %s to %s\n", target, parent->name);
fprintf(stderr, "adding PI %s child to %s\n", target, parent->name);
#endif
xmlAddChild(parent, ret);
} else {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "adding PI sibling %s to ", target);
fprintf(stderr, "adding PI %s sibling to ", target);
xmlDebugDumpOneNode(stderr, parent, 0);
#endif
xmlAddSibling(parent, ret);
}
}
}
/**
* globalNamespace:
* @ctx: the user data (XML parser context)
@ -1064,15 +1231,20 @@ comment(void *ctx, const xmlChar *value)
ret = xmlNewDocComment(ctxt->myDoc, value);
if (ret == NULL) return;
if (ctxt->myDoc->root == NULL) {
if (ctxt->inSubset == 1) {
xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
return;
} else if (ctxt->inSubset == 2) {
xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret);
return;
}
if ((ctxt->myDoc->children == NULL) || (parent == NULL)) {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting comment as root\n");
#endif
ctxt->myDoc->root = ret;
} else if (parent == NULL) {
parent = ctxt->myDoc->root;
xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
return;
}
if (parent != NULL) {
if (parent->type == XML_ELEMENT_NODE) {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "adding comment child to %s\n", parent->name);
@ -1086,7 +1258,6 @@ comment(void *ctx, const xmlChar *value)
xmlAddSibling(parent, ret);
}
}
}
/**
* cdataBlock:
@ -1148,6 +1319,7 @@ xmlSAXHandler xmlDefaultSAXHandler = {
xmlParserError,
getParameterEntity,
cdataBlock,
externalSubset,
};
/**
@ -1159,6 +1331,7 @@ void
xmlDefaultSAXHandlerInit(void)
{
xmlDefaultSAXHandler.internalSubset = internalSubset;
xmlDefaultSAXHandler.externalSubset = externalSubset;
xmlDefaultSAXHandler.isStandalone = isStandalone;
xmlDefaultSAXHandler.hasInternalSubset = hasInternalSubset;
xmlDefaultSAXHandler.hasExternalSubset = hasExternalSubset;
@ -1181,6 +1354,9 @@ xmlDefaultSAXHandlerInit(void)
xmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace;
xmlDefaultSAXHandler.processingInstruction = processingInstruction;
xmlDefaultSAXHandler.comment = comment;
if (xmlGetWarningsDefaultValue == 0)
xmlDefaultSAXHandler.warning = NULL;
else
xmlDefaultSAXHandler.warning = xmlParserWarning;
xmlDefaultSAXHandler.error = xmlParserError;
xmlDefaultSAXHandler.fatalError = xmlParserError;
@ -1216,6 +1392,7 @@ xmlSAXHandler htmlDefaultSAXHandler = {
xmlParserError,
getParameterEntity,
NULL,
NULL,
};
/**
@ -1227,6 +1404,7 @@ void
htmlDefaultSAXHandlerInit(void)
{
htmlDefaultSAXHandler.internalSubset = NULL;
htmlDefaultSAXHandler.externalSubset = NULL;
htmlDefaultSAXHandler.isStandalone = NULL;
htmlDefaultSAXHandler.hasInternalSubset = NULL;
htmlDefaultSAXHandler.hasExternalSubset = NULL;

View File

@ -3,9 +3,9 @@ AC_PREREQ(2.2)
AC_INIT(entities.h)
AM_CONFIG_HEADER(config.h)
LIBXML_MAJOR_VERSION=1
LIBXML_MINOR_VERSION=8
LIBXML_MICRO_VERSION=7
LIBXML_MAJOR_VERSION=2
LIBXML_MINOR_VERSION=0
LIBXML_MICRO_VERSION=0
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
@ -15,7 +15,7 @@ AC_SUBST(LIBXML_MICRO_VERSION)
AC_SUBST(LIBXML_VERSION)
AC_SUBST(LIBXML_VERSION_INFO)
VERSION=$LIBXML_VERSION
VERSION=$LIBXML_VERSION-beta
AM_INIT_AUTOMAKE(libxml, $VERSION)

View File

@ -22,6 +22,7 @@
#include "xmlmemory.h"
#include "tree.h"
#include "parser.h"
#include "valid.h"
#include "debugXML.h"
#include "HTMLtree.h"
#include "HTMLparser.h"
@ -38,6 +39,315 @@ void xmlDebugDumpString(FILE *output, const xmlChar *str) {
fprintf(output, "...");
}
void xmlDebugDumpDtd(FILE *output, xmlDtdPtr dtd, int depth) {
int i;
char shift[100];
for (i = 0;((i < depth) && (i < 25));i++)
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
if (dtd->type != XML_DTD_NODE) {
fprintf(output, "PBM: not a DTD\n");
return;
}
if (dtd->name != NULL)
fprintf(output, "DTD(%s)", dtd->name);
else
fprintf(output, "DTD");
if (dtd->ExternalID != NULL)
fprintf(output, ", PUBLIC %s", dtd->ExternalID);
if (dtd->SystemID != NULL)
fprintf(output, ", SYSTEM %s", dtd->SystemID);
fprintf(output, "\n");
/*
* Do a bit of checking
*/
if (dtd->parent == NULL)
fprintf(output, "PBM: Dtd has no parent\n");
if (dtd->doc == NULL)
fprintf(output, "PBM: Dtd has no doc\n");
if ((dtd->parent != NULL) && (dtd->doc != dtd->parent->doc))
fprintf(output, "PBM: Dtd doc differs from parent's one\n");
if (dtd->prev == NULL) {
if ((dtd->parent != NULL) && (dtd->parent->children != (xmlNodePtr)dtd))
fprintf(output, "PBM: Dtd has no prev and not first of list\n");
} else {
if (dtd->prev->next != (xmlNodePtr) dtd)
fprintf(output, "PBM: Dtd prev->next : back link wrong\n");
}
if (dtd->next == NULL) {
if ((dtd->parent != NULL) && (dtd->parent->last != (xmlNodePtr) dtd))
fprintf(output, "PBM: Dtd has no next and not last of list\n");
} else {
if (dtd->next->prev != (xmlNodePtr) dtd)
fprintf(output, "PBM: Dtd next->prev : forward link wrong\n");
}
}
void xmlDebugDumpAttrDecl(FILE *output, xmlAttributePtr attr, int depth) {
int i;
char shift[100];
for (i = 0;((i < depth) && (i < 25));i++)
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
if (attr->type != XML_ATTRIBUTE_DECL) {
fprintf(output, "PBM: not a Attr\n");
return;
}
if (attr->name != NULL)
fprintf(output, "ATTRDECL(%s)", attr->name);
else
fprintf(output, "PBM ATTRDECL noname!!!");
if (attr->elem != NULL)
fprintf(output, " for %s", attr->elem);
else
fprintf(output, " PBM noelem!!!");
switch (attr->atype) {
case XML_ATTRIBUTE_CDATA:
fprintf(output, " CDATA");
break;
case XML_ATTRIBUTE_ID:
fprintf(output, " ID");
break;
case XML_ATTRIBUTE_IDREF:
fprintf(output, " IDREF");
break;
case XML_ATTRIBUTE_IDREFS:
fprintf(output, " IDREFS");
break;
case XML_ATTRIBUTE_ENTITY:
fprintf(output, " ENTITY");
break;
case XML_ATTRIBUTE_ENTITIES:
fprintf(output, " ENTITIES");
break;
case XML_ATTRIBUTE_NMTOKEN:
fprintf(output, " NMTOKEN");
break;
case XML_ATTRIBUTE_NMTOKENS:
fprintf(output, " NMTOKENS");
break;
case XML_ATTRIBUTE_ENUMERATION:
fprintf(output, " ENUMERATION");
break;
case XML_ATTRIBUTE_NOTATION:
fprintf(output, " NOTATION ");
break;
}
if (attr->tree != NULL) {
int i;
xmlEnumerationPtr cur = attr->tree;
for (i = 0;i < 5; i++) {
if (i != 0)
fprintf(output, "|%s", cur->name);
else
fprintf(output, " (%s", cur->name);
cur = cur->next;
if (cur == NULL) break;
}
if (cur == NULL)
fprintf(output, ")");
else
fprintf(output, "...)");
}
switch (attr->def) {
case XML_ATTRIBUTE_NONE:
break;
case XML_ATTRIBUTE_REQUIRED:
fprintf(output, " REQUIRED");
break;
case XML_ATTRIBUTE_IMPLIED:
fprintf(output, " IMPLIED");
break;
case XML_ATTRIBUTE_FIXED:
fprintf(output, " FIXED");
break;
}
if (attr->defaultValue != NULL) {
fprintf(output, "\"");
xmlDebugDumpString(output, attr->defaultValue);
fprintf(output, "\"");
}
printf("\n");
/*
* Do a bit of checking
*/
if (attr->parent == NULL)
fprintf(output, "PBM: Attr has no parent\n");
if (attr->doc == NULL)
fprintf(output, "PBM: Attr has no doc\n");
if ((attr->parent != NULL) && (attr->doc != attr->parent->doc))
fprintf(output, "PBM: Attr doc differs from parent's one\n");
if (attr->prev == NULL) {
if ((attr->parent != NULL) && (attr->parent->children != (xmlNodePtr)attr))
fprintf(output, "PBM: Attr has no prev and not first of list\n");
} else {
if (attr->prev->next != (xmlNodePtr) attr)
fprintf(output, "PBM: Attr prev->next : back link wrong\n");
}
if (attr->next == NULL) {
if ((attr->parent != NULL) && (attr->parent->last != (xmlNodePtr) attr))
fprintf(output, "PBM: Attr has no next and not last of list\n");
} else {
if (attr->next->prev != (xmlNodePtr) attr)
fprintf(output, "PBM: Attr next->prev : forward link wrong\n");
}
}
void xmlDebugDumpElemDecl(FILE *output, xmlElementPtr elem, int depth) {
int i;
char shift[100];
for (i = 0;((i < depth) && (i < 25));i++)
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
if (elem->type != XML_ELEMENT_DECL) {
fprintf(output, "PBM: not a Elem\n");
return;
}
if (elem->name != NULL)
fprintf(output, "ELEMDECL(%s)", elem->name);
else
fprintf(output, "PBM ELEMDECL noname!!!");
switch (elem->etype) {
case XML_ELEMENT_TYPE_EMPTY:
fprintf(output, ", EMPTY");
break;
case XML_ELEMENT_TYPE_ANY:
fprintf(output, ", ANY");
break;
case XML_ELEMENT_TYPE_MIXED:
fprintf(output, ", MIXED ");
break;
case XML_ELEMENT_TYPE_ELEMENT:
fprintf(output, ", MIXED ");
break;
}
if (elem->content != NULL) {
char buf[1001];
buf[0] = 0;
xmlSprintfElementContent(buf, elem->content, 1);
buf[1000] = 0;
fprintf(output, "%s", buf);
}
printf("\n");
/*
* Do a bit of checking
*/
if (elem->parent == NULL)
fprintf(output, "PBM: Elem has no parent\n");
if (elem->doc == NULL)
fprintf(output, "PBM: Elem has no doc\n");
if ((elem->parent != NULL) && (elem->doc != elem->parent->doc))
fprintf(output, "PBM: Elem doc differs from parent's one\n");
if (elem->prev == NULL) {
if ((elem->parent != NULL) && (elem->parent->children != (xmlNodePtr)elem))
fprintf(output, "PBM: Elem has no prev and not first of list\n");
} else {
if (elem->prev->next != (xmlNodePtr) elem)
fprintf(output, "PBM: Elem prev->next : back link wrong\n");
}
if (elem->next == NULL) {
if ((elem->parent != NULL) && (elem->parent->last != (xmlNodePtr) elem))
fprintf(output, "PBM: Elem has no next and not last of list\n");
} else {
if (elem->next->prev != (xmlNodePtr) elem)
fprintf(output, "PBM: Elem next->prev : forward link wrong\n");
}
}
void xmlDebugDumpEntityDecl(FILE *output, xmlEntityPtr ent, int depth) {
int i;
char shift[100];
for (i = 0;((i < depth) && (i < 25));i++)
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
if (ent->type != XML_ENTITY_DECL) {
fprintf(output, "PBM: not a Entity decl\n");
return;
}
if (ent->name != NULL)
fprintf(output, "ENTITYDECL(%s)", ent->name);
else
fprintf(output, "PBM ENTITYDECL noname!!!");
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, ", internal\n");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
fprintf(output, ", external parsed\n");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
fprintf(output, ", unparsed\n");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
fprintf(output, ", parameter\n");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
fprintf(output, ", external parameter\n");
break;
case XML_INTERNAL_PREDEFINED_ENTITY:
fprintf(output, ", predefined\n");
break;
}
if (ent->ExternalID) {
fprintf(output, shift);
fprintf(output, "ExternalID=%s\n", ent->ExternalID);
}
if (ent->SystemID) {
fprintf(output, shift);
fprintf(output, "SystemID=%s\n", ent->SystemID);
}
if (ent->content) {
fprintf(output, shift);
fprintf(output, "content=");
xmlDebugDumpString(output, ent->content);
fprintf(output, "\n");
}
/*
* Do a bit of checking
*/
if (ent->parent == NULL)
fprintf(output, "PBM: Ent has no parent\n");
if (ent->doc == NULL)
fprintf(output, "PBM: Ent has no doc\n");
if ((ent->parent != NULL) && (ent->doc != ent->parent->doc))
fprintf(output, "PBM: Ent doc differs from parent's one\n");
if (ent->prev == NULL) {
if ((ent->parent != NULL) && (ent->parent->children != (xmlNodePtr)ent))
fprintf(output, "PBM: Ent has no prev and not first of list\n");
} else {
if (ent->prev->next != (xmlNodePtr) ent)
fprintf(output, "PBM: Ent prev->next : back link wrong\n");
}
if (ent->next == NULL) {
if ((ent->parent != NULL) && (ent->parent->last != (xmlNodePtr) ent))
fprintf(output, "PBM: Ent has no next and not last of list\n");
} else {
if (ent->next->prev != (xmlNodePtr) ent)
fprintf(output, "PBM: Ent next->prev : forward link wrong\n");
}
}
void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) {
int i;
char shift[100];
@ -74,7 +384,7 @@ void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) {
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
switch (ent->type) {
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, "INTERNAL_GENERAL_ENTITY ");
break;
@ -91,7 +401,7 @@ void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) {
fprintf(output, "EXTERNAL_PARAMETER_ENTITY ");
break;
default:
fprintf(output, "ENTITY_%d ! ", ent->type);
fprintf(output, "ENTITY_%d ! ", ent->etype);
}
fprintf(output, "%s\n", ent->name);
if (ent->ExternalID) {
@ -119,9 +429,31 @@ void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth) {
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
fprintf(output, "ATTRIBUTE %s\n", attr->name);
if (attr->val != NULL)
xmlDebugDumpNodeList(output, attr->val, depth + 1);
if (attr->children != NULL)
xmlDebugDumpNodeList(output, attr->children, depth + 1);
/*
* Do a bit of checking
*/
if (attr->parent == NULL)
fprintf(output, "PBM: Attr has no parent\n");
if (attr->doc == NULL)
fprintf(output, "PBM: Attr has no doc\n");
if ((attr->parent != NULL) && (attr->doc != attr->parent->doc))
fprintf(output, "PBM: Attr doc differs from parent's one\n");
if (attr->prev == NULL) {
if ((attr->parent != NULL) && (attr->parent->properties != attr))
fprintf(output, "PBM: Attr has no prev and not first of list\n");
} else {
if (attr->prev->next != attr)
fprintf(output, "PBM: Attr prev->next : back link wrong\n");
}
if (attr->next != NULL) {
if (attr->next->prev != attr)
fprintf(output, "PBM: Attr next->prev : forward link wrong\n");
}
}
void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth) {
@ -139,9 +471,9 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
switch (node->type) {
case XML_ELEMENT_NODE:
fprintf(output, shift);
fprintf(output, "ELEMENT ");
if (node->ns != NULL)
fprintf(output, "%s:%s\n", node->ns->prefix, node->name);
@ -149,40 +481,63 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
fprintf(output, "%s\n", node->name);
break;
case XML_ATTRIBUTE_NODE:
fprintf(output, shift);
fprintf(output, "Error, ATTRIBUTE found here\n");
break;
case XML_TEXT_NODE:
fprintf(output, shift);
fprintf(output, "TEXT\n");
break;
case XML_CDATA_SECTION_NODE:
fprintf(output, shift);
fprintf(output, "CDATA_SECTION\n");
break;
case XML_ENTITY_REF_NODE:
fprintf(output, "ENTITY_REF\n");
fprintf(output, shift);
fprintf(output, "ENTITY_REF(%s)\n", node->name);
break;
case XML_ENTITY_NODE:
fprintf(output, shift);
fprintf(output, "ENTITY\n");
break;
case XML_PI_NODE:
fprintf(output, shift);
fprintf(output, "PI %s\n", node->name);
break;
case XML_COMMENT_NODE:
fprintf(output, shift);
fprintf(output, "COMMENT\n");
break;
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
fprintf(output, shift);
fprintf(output, "Error, DOCUMENT found here\n");
break;
case XML_DOCUMENT_TYPE_NODE:
fprintf(output, shift);
fprintf(output, "DOCUMENT_TYPE\n");
break;
case XML_DOCUMENT_FRAG_NODE:
fprintf(output, shift);
fprintf(output, "DOCUMENT_FRAG\n");
break;
case XML_NOTATION_NODE:
fprintf(output, "NOTATION\n");
break;
case XML_DTD_NODE:
xmlDebugDumpDtd(output, (xmlDtdPtr) node, depth);
return;
case XML_ELEMENT_DECL:
xmlDebugDumpElemDecl(output, (xmlElementPtr) node, depth);
return;
case XML_ATTRIBUTE_DECL:
xmlDebugDumpAttrDecl(output, (xmlAttributePtr) node, depth);
return;
case XML_ENTITY_DECL:
xmlDebugDumpEntityDecl(output, (xmlEntityPtr) node, depth);
return;
default:
fprintf(output, shift);
fprintf(output, "NODE_%d\n", node->type);
}
if (node->doc == NULL) {
@ -210,12 +565,35 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
if (ent != NULL)
xmlDebugDumpEntity(output, ent, depth + 1);
}
/*
* Do a bit of checking
*/
if (node->parent == NULL)
fprintf(output, "PBM: Node has no parent\n");
if (node->doc == NULL)
fprintf(output, "PBM: Node has no doc\n");
if ((node->parent != NULL) && (node->doc != node->parent->doc))
fprintf(output, "PBM: Node doc differs from parent's one\n");
if (node->prev == NULL) {
if ((node->parent != NULL) && (node->parent->children != node))
fprintf(output, "PBM: Node has no prev and not first of list\n");
} else {
if (node->prev->next != node)
fprintf(output, "PBM: Node prev->next : back link wrong\n");
}
if (node->next == NULL) {
if ((node->parent != NULL) && (node->parent->last != node))
fprintf(output, "PBM: Node has no next and not last of list\n");
} else {
if (node->next->prev != node)
fprintf(output, "PBM: Node next->prev : forward link wrong\n");
}
}
void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth) {
xmlDebugDumpOneNode(output, node, depth);
if (node->childs != NULL)
xmlDebugDumpNodeList(output, node->childs, depth + 1);
if (node->children != NULL)
xmlDebugDumpNodeList(output, node->children, depth + 1);
}
void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth) {
@ -306,8 +684,8 @@ void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) {
xmlDebugDumpDocumentHead(output, doc);
if (((doc->type == XML_DOCUMENT_NODE) ||
(doc->type == XML_HTML_DOCUMENT_NODE)) &&
(doc->root != NULL))
xmlDebugDumpNodeList(output, doc->root, 1);
(doc->children != NULL))
xmlDebugDumpNodeList(output, doc->children, 1);
}
void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
@ -368,27 +746,27 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
doc->intSubset->entities;
fprintf(output, "Entities in internal subset\n");
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
cur = table->table[i];
fprintf(output, "%d : %s : ", i, cur->name);
switch (cur->type) {
switch (cur->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, "INTERNAL GENERAL");
fprintf(output, "INTERNAL GENERAL, ");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
fprintf(output, "EXTERNAL PARSED");
fprintf(output, "EXTERNAL PARSED, ");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
fprintf(output, "EXTERNAL UNPARSED");
fprintf(output, "EXTERNAL UNPARSED, ");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
fprintf(output, "INTERNAL PARAMETER");
fprintf(output, "INTERNAL PARAMETER, ");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
fprintf(output, "EXTERNAL PARAMETER");
fprintf(output, "EXTERNAL PARAMETER, ");
break;
default:
fprintf(output, "UNKNOWN TYPE %d",
cur->type);
cur->etype);
}
if (cur->ExternalID != NULL)
fprintf(output, "ID \"%s\"", cur->ExternalID);
@ -407,27 +785,27 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
doc->extSubset->entities;
fprintf(output, "Entities in external subset\n");
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
cur = table->table[i];
fprintf(output, "%d : %s : ", i, cur->name);
switch (cur->type) {
switch (cur->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, "INTERNAL GENERAL");
fprintf(output, "INTERNAL GENERAL, ");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
fprintf(output, "EXTERNAL PARSED");
fprintf(output, "EXTERNAL PARSED, ");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
fprintf(output, "EXTERNAL UNPARSED");
fprintf(output, "EXTERNAL UNPARSED, ");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
fprintf(output, "INTERNAL PARAMETER");
fprintf(output, "INTERNAL PARAMETER, ");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
fprintf(output, "EXTERNAL PARAMETER");
fprintf(output, "EXTERNAL PARAMETER, ");
break;
default:
fprintf(output, "UNKNOWN TYPE %d",
cur->type);
cur->etype);
}
if (cur->ExternalID != NULL)
fprintf(output, "ID \"%s\"", cur->ExternalID);
@ -449,14 +827,14 @@ static int xmlLsCountNode(xmlNodePtr node) {
switch (node->type) {
case XML_ELEMENT_NODE:
list = node->childs;
list = node->children;
break;
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
list = ((xmlDocPtr) node)->root;
list = ((xmlDocPtr) node)->children;
break;
case XML_ATTRIBUTE_NODE:
list = ((xmlAttrPtr) node)->val;
list = ((xmlAttrPtr) node)->children;
break;
case XML_TEXT_NODE:
case XML_CDATA_SECTION_NODE:
@ -475,6 +853,10 @@ static int xmlLsCountNode(xmlNodePtr node) {
case XML_ENTITY_NODE:
case XML_DOCUMENT_FRAG_NODE:
case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
ret = 1;
break;
}
@ -621,9 +1003,9 @@ xmlShellList(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node,
if ((node->type == XML_DOCUMENT_NODE) ||
(node->type == XML_HTML_DOCUMENT_NODE)) {
cur = ((xmlDocPtr) node)->root;
} else if (node->childs != NULL) {
cur = node->childs;
cur = ((xmlDocPtr) node)->children;
} else if (node->children != NULL) {
cur = node->children;
} else {
xmlLsOneNode(stdout, node);
return(0);
@ -910,10 +1292,10 @@ xmlShellDu(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr tree,
if ((node->type == XML_DOCUMENT_NODE) ||
(node->type == XML_HTML_DOCUMENT_NODE)) {
node = ((xmlDocPtr) node)->root;
} else if (node->childs != NULL) {
node = ((xmlDocPtr) node)->children;
} else if (node->children != NULL) {
/* deep first */
node = node->childs;
node = node->children;
indent++;
} else if ((node != tree) && (node->next != NULL)) {
/* then siblings */
@ -1008,7 +1390,7 @@ xmlShellPwd(xmlShellCtxtPtr ctxt, char *buffer, xmlNodePtr node,
} else if (cur->type == XML_ATTRIBUTE_NODE) {
sep = '@';
name = (const char *) (((xmlAttrPtr) cur)->name);
next = ((xmlAttrPtr) cur)->node;
next = ((xmlAttrPtr) cur)->parent;
} else {
next = cur->parent;
}

View File

@ -35,14 +35,11 @@
#include <stdlib.h>
#endif
#include "encoding.h"
#ifdef HAVE_UNICODE_H
#include <unicode.h>
#endif
#include "xmlmemory.h"
#ifdef HAVE_UNICODE_H
xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
#else /* ! HAVE_UNICODE_H */
/*
* From rfc2044: encoding of the Unicode values on UTF-8:
*
@ -54,6 +51,50 @@
* I hope we won't use values > 0xFFFF anytime soon !
*/
/**
* xmlCheckUTF8: Check utf-8 string for legality.
* @utf: Pointer to putative utf-8 encoded string.
*
* Checks @utf for being valid utf-8. @utf is assumed to be
* null-terminated. This function is not super-strict, as it will
* allow longer utf-8 sequences than necessary. Note that Java is
* capable of producing these sequences if provoked. Also note, this
* routine checks for the 4-byte maxiumum size, but does not check for
* 0x10ffff maximum value.
*
* Return value: true if @utf is valid.
**/
int
xmlCheckUTF8(const unsigned char *utf)
{
int ix;
unsigned char c;
for (ix = 0; (c = utf[ix]);) {
if (c & 0x80) {
if ((utf[ix + 1] & 0xc0) != 0x80)
return(0);
if ((c & 0xe0) == 0xe0) {
if ((utf[ix + 2] & 0xc0) != 0x80)
return(0);
if ((c & 0xf0) == 0xf0) {
if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
return(0);
ix += 4;
/* 4-byte code */
} else
/* 3-byte code */
ix += 3;
} else
/* 2-byte code */
ix += 2;
} else
/* 1-byte code */
ix++;
}
return(1);
}
/**
* isolat1ToUTF8:
* @out: a pointer to an array of bytes to store the result
@ -66,27 +107,27 @@
* Returns the number of byte written, or -1 by lack of space.
*/
int
isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
{
isolat1ToUTF8(unsigned char* out, int outlen,
const unsigned char* in, int *inlen) {
unsigned char* outstart= out;
unsigned char* outend= out+outlen;
unsigned char* inend= in+inlen;
const unsigned char* inend= in+*inlen;
unsigned char c;
while (in < inend) {
c= *in++;
if (c < 0x80) {
if (out >= outend) return -1;
if (out >= outend) return(-1);
*out++ = c;
}
else {
if (out >= outend) return -1;
if (out >= outend) return(-1);
*out++ = 0xC0 | (c >> 6);
if (out >= outend) return -1;
if (out >= outend) return(-1);
*out++ = 0x80 | (0x3F & c);
}
}
return out-outstart;
return(out-outstart);
}
/**
@ -101,138 +142,398 @@ isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
* TODO: UTF8Toisolat1 need a fallback mechanism ...
*
* Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding failed.
* if the transcoding fails (for *in is not valid utf8 string or
* the result of transformation can't fit into the encoding we want)
* The value of @inlen after return is the number of octets consumed
* as the return value is positive, else unpredictiable.
*/
int
UTF8Toisolat1(unsigned char* out, int outlen, unsigned char* in, int inlen)
{
UTF8Toisolat1(unsigned char* out, int outlen,
const unsigned char* in, int *inlen) {
unsigned char* outstart= out;
unsigned char* outend= out+outlen;
unsigned char* inend= in+inlen;
const unsigned char* inend= in+*inlen;
unsigned char c;
while (in < inend) {
c= *in++;
if (c < 0x80) {
if (out >= outend) return -1;
if (out >= outend) return(-1);
*out++= c;
}
else if (((c & 0xFE) == 0xC2) && in<inend) {
if (out >= outend) return -1;
else if (in == inend) {
*inlen -= 1;
break;
}
else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
/* a two byte utf-8 and can be encoding as isolate1 */
*out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
}
else return -2;
else
return(-2);
/* TODO : some should be represent as "&#x____;" */
}
return out-outstart;
return(out-outstart);
}
/**
* UTF16ToUTF8:
* UTF16LEToUTF8:
* @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out
* @in: a pointer to an array of UTF-16 chars (array of unsigned shorts)
* @inlen: the length of @in
* @inb: a pointer to an array of UTF-16LE passwd as a byte array
* @inlenb: the length of @in in UTF-16LE chars
*
* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
* block of chars out.
* Returns the number of byte written, or -1 by lack of space.
* Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
* block of chars out. This function assume the endian properity
* is the same between the native type of this machine and the
* inputed one.
*
* Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding fails (for *in is not valid utf16 string)
* The value of *inlen after return is the number of octets consumed
* as the return value is positive, else unpredictiable.
*/
int
UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int inlen)
UTF16LEToUTF8(unsigned char* out, int outlen,
const unsigned char* inb, int *inlenb)
{
unsigned char* outstart= out;
unsigned char* outend= out+outlen;
unsigned short* inend= in+inlen;
unsigned int c, d;
unsigned short* in = (unsigned short*) inb;
unsigned short* inend;
unsigned int c, d, inlen;
unsigned char *tmp;
int bits;
if ((*inlenb % 2) == 1)
(*inlenb)--;
inlen = *inlenb / 2;
inend= in + inlen;
while (in < inend) {
#ifdef BIG_ENDIAN
tmp = (unsigned char *) in;
c = *tmp++;
c = c | (((unsigned int)*tmp) << 8);
in++;
#else /* BIG_ENDIAN */
c= *in++;
#endif /* BIG_ENDIAN */
if ((c & 0xFC00) == 0xD800) { /* surrogates */
if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) {
if (in >= inend) { /* (in > inend) shouldn't happens */
(*inlenb) -= 2;
break;
}
#ifdef BIG_ENDIAN
tmp = (unsigned char *) in;
d = *tmp++;
d = d | (((unsigned int)*tmp) << 8);
in++;
#else /* BIG_ENDIAN */
d = *in++;
#endif /* BIG_ENDIAN */
if ((d & 0xFC00) == 0xDC00) {
c &= 0x03FF;
c <<= 10;
c |= d & 0x03FF;
c += 0x10000;
}
else return -1;
else
return(-2);
}
/* assertion: c is a single UTF-4 value */
if (out >= outend) return -1;
if (out >= outend)
return(-1);
if (c < 0x80) { *out++= c; bits= -6; }
else if (c < 0x800) { *out++= (c >> 6) | 0xC0; bits= 0; }
else if (c < 0x10000) { *out++= (c >> 12) | 0xE0; bits= 6; }
else { *out++= (c >> 18) | 0xF0; bits= 12; }
else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
for ( ; bits > 0; bits-= 6) {
if (out >= outend) return -1;
*out++= (c >> bits) & 0x3F;
for ( ; bits >= 0; bits-= 6) {
if (out >= outend)
return(-1);
*out++= ((c >> bits) & 0x3F) | 0x80;
}
}
return out-outstart;
return(out-outstart);
}
/**
* UTF8ToUTF16:
* @out: a pointer to an array of shorts to store the result
* @outlen: the length of @out (number of shorts)
* UTF8ToUTF16LE:
* @outb: a pointer to an array of bytes to store the result
* @outlen: the length of @outb
* @in: a pointer to an array of UTF-8 chars
* @inlen: the length of @in
*
* Take a block of UTF-8 chars in and try to convert it to an UTF-16
* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
* block of chars out.
* TODO: UTF8ToUTF16 need a fallback mechanism ...
* TODO: UTF8ToUTF16LE need a fallback mechanism ...
*
* Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding failed.
*/
int
UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
UTF8ToUTF16LE(unsigned char* outb, int outlen,
const unsigned char* in, int *inlen)
{
unsigned short* out = (unsigned short*) outb;
unsigned short* outstart= out;
unsigned short* outend= out+outlen;
unsigned char* inend= in+inlen;
unsigned short* outend;
const unsigned char* inend= in+*inlen;
unsigned int c, d, trailing;
#ifdef BIG_ENDIAN
unsigned char *tmp;
unsigned short tmp1, tmp2;
#endif /* BIG_ENDIAN */
outlen /= 2; /* convert in short length */
outend = out + outlen;
while (in < inend) {
d= *in++;
if (d < 0x80) { c= d; trailing= 0; }
else if (d < 0xC0) return -2; /* trailing byte in leading position */
else if (d < 0xC0)
return(-2); /* trailing byte in leading position */
else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
else return -2; /* no chance for this in UTF-16 */
else
return(-2); /* no chance for this in UTF-16 */
if (inend - in < trailing) {
*inlen -= (inend - in);
break;
}
for ( ; trailing; trailing--) {
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) return -1;
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
return(-1);
c <<= 6;
c |= d & 0x3F;
}
/* assertion: c is a single UTF-4 value */
if (c < 0x10000) {
if (out >= outend) return -1;
if (out >= outend)
return(-1);
#ifdef BIG_ENDIAN
tmp = (unsigned char *) out;
*tmp = c ;
*(tmp + 1) = c >> 8 ;
out++;
#else /* BIG_ENDIAN */
*out++ = c;
#endif /* BIG_ENDIAN */
}
else if (c < 0x110000) {
if (out+1 >= outend) return -1;
if (out+1 >= outend)
return(-1);
c -= 0x10000;
#ifdef BIG_ENDIAN
tmp1 = 0xD800 | (c >> 10);
tmp = (unsigned char *) out;
*tmp = tmp1;
*(tmp + 1) = tmp1 >> 8;
out++;
tmp2 = 0xDC00 | (c & 0x03FF);
tmp = (unsigned char *) out;
*tmp = tmp2;
*(tmp + 1) = tmp2 >> 8;
out++;
#else /* BIG_ENDIAN */
*out++ = 0xD800 | (c >> 10);
*out++ = 0xDC00 | (c & 0x03FF);
#endif /* BIG_ENDIAN */
}
else return -1;
else
return(-1);
}
return out-outstart;
return(out-outstart);
}
#endif /* ! HAVE_UNICODE_H */
/**
* UTF16BEToUTF8:
* @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out
* @inb: a pointer to an array of UTF-16 passwd as a byte array
* @inlenb: the length of @in in UTF-16 chars
*
* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
* block of chars out. This function assume the endian properity
* is the same between the native type of this machine and the
* inputed one.
*
* Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding fails (for *in is not valid utf16 string)
* The value of *inlen after return is the number of octets consumed
* as the return value is positive, else unpredictiable.
*/
int
UTF16BEToUTF8(unsigned char* out, int outlen,
const unsigned char* inb, int *inlenb)
{
unsigned char* outstart= out;
unsigned char* outend= out+outlen;
unsigned short* in = (unsigned short*) inb;
unsigned short* inend;
unsigned int c, d, inlen;
#ifdef BIG_ENDIAN
#else /* BIG_ENDIAN */
unsigned char *tmp;
#endif /* BIG_ENDIAN */
int bits;
if ((*inlenb % 2) == 1)
(*inlenb)--;
inlen = *inlenb / 2;
inend= in + inlen;
while (in < inend) {
#ifdef BIG_ENDIAN
c= *in++;
#else
tmp = (unsigned char *) in;
c = *tmp++;
c = c << 8;
c = c | (unsigned int) *tmp;
in++;
#endif
if ((c & 0xFC00) == 0xD800) { /* surrogates */
if (in >= inend) { /* (in > inend) shouldn't happens */
(*inlenb) -= 2;
break;
}
#ifdef BIG_ENDIAN
d= *in++;
#else
tmp = (unsigned char *) in;
d = *tmp++;
d = d << 8;
d = d | (unsigned int) *tmp;
in++;
#endif
if ((d & 0xFC00) == 0xDC00) {
c &= 0x03FF;
c <<= 10;
c |= d & 0x03FF;
c += 0x10000;
}
else
return(-2);
}
/* assertion: c is a single UTF-4 value */
if (out >= outend)
return(-1);
if (c < 0x80) { *out++= c; bits= -6; }
else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
for ( ; bits >= 0; bits-= 6) {
if (out >= outend)
return(-1);
*out++= ((c >> bits) & 0x3F) | 0x80;
}
}
return(out-outstart);
}
/**
* UTF8ToUTF16BE:
* @outb: a pointer to an array of bytes to store the result
* @outlen: the length of @outb
* @in: a pointer to an array of UTF-8 chars
* @inlen: the length of @in
*
* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
* block of chars out.
* TODO: UTF8ToUTF16BE need a fallback mechanism ...
*
* Returns the number of byte written, or -1 by lack of space, or -2
* if the transcoding failed.
*/
int
UTF8ToUTF16BE(unsigned char* outb, int outlen,
const unsigned char* in, int *inlen)
{
unsigned short* out = (unsigned short*) outb;
unsigned short* outstart= out;
unsigned short* outend;
const unsigned char* inend= in+*inlen;
unsigned int c, d, trailing;
#ifdef BIG_ENDIAN
#else
unsigned char *tmp;
unsigned short tmp1, tmp2;
#endif /* BIG_ENDIAN */
outlen /= 2; /* convert in short length */
outend = out + outlen;
while (in < inend) {
d= *in++;
if (d < 0x80) { c= d; trailing= 0; }
else if (d < 0xC0)
return(-2); /* trailing byte in leading position */
else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
else
return(-2); /* no chance for this in UTF-16 */
if (inend - in < trailing) {
*inlen -= (inend - in);
break;
}
for ( ; trailing; trailing--) {
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) return(-1);
c <<= 6;
c |= d & 0x3F;
}
/* assertion: c is a single UTF-4 value */
if (c < 0x10000) {
if (out >= outend) return(-1);
#ifdef BIG_ENDIAN
*out++ = c;
#else
tmp = (unsigned char *) out;
*tmp = c >> 8;
*(tmp + 1) = c;
out++;
#endif /* BIG_ENDIAN */
}
else if (c < 0x110000) {
if (out+1 >= outend) return(-1);
c -= 0x10000;
#ifdef BIG_ENDIAN
*out++ = 0xD800 | (c >> 10);
*out++ = 0xDC00 | (c & 0x03FF);
#else
tmp1 = 0xD800 | (c >> 10);
tmp = (unsigned char *) out;
*tmp = tmp1 >> 8;
*(tmp + 1) = tmp1;
out++;
tmp2 = 0xDC00 | (c & 0x03FF);
tmp = (unsigned char *) out;
*tmp = tmp2 >> 8;
*(tmp + 1) = tmp2;
out++;
#endif
}
else return(-1);
}
return(out-outstart);
}
/**
* xmlDetectCharEncoding:
* @in: a pointer to the first bytes of the XML entity, must be at least
* 4 bytes long.
* @len: pointer to the length of the buffer
*
* Guess the encoding of the entity using the first bytes of the entity content
* accordingly of the non-normative appendix F of the XML-1.0 recommendation.
@ -240,8 +541,9 @@ UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
* Returns one of the XML_CHAR_ENCODING_... values.
*/
xmlCharEncoding
xmlDetectCharEncoding(const unsigned char* in)
xmlDetectCharEncoding(const unsigned char* in, int len)
{
if (len >= 4) {
if ((in[0] == 0x00) && (in[1] == 0x00) &&
(in[2] == 0x00) && (in[3] == 0x3C))
return(XML_CHAR_ENCODING_UCS4BE);
@ -254,16 +556,19 @@ xmlDetectCharEncoding(const unsigned char* in)
if ((in[0] == 0x00) && (in[1] == 0x3C) &&
(in[2] == 0x00) && (in[3] == 0x00))
return(XML_CHAR_ENCODING_UCS4_3412);
if ((in[0] == 0xFE) && (in[1] == 0xFF))
return(XML_CHAR_ENCODING_UTF16BE);
if ((in[0] == 0xFF) && (in[1] == 0xFE))
return(XML_CHAR_ENCODING_UTF16LE);
if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
(in[2] == 0xA7) && (in[3] == 0x94))
return(XML_CHAR_ENCODING_EBCDIC);
if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
(in[2] == 0x78) && (in[3] == 0x6D))
return(XML_CHAR_ENCODING_UTF8);
}
if (len >= 2) {
if ((in[0] == 0xFE) && (in[1] == 0xFF))
return(XML_CHAR_ENCODING_UTF16BE);
if ((in[0] == 0xFF) && (in[1] == 0xFE))
return(XML_CHAR_ENCODING_UTF16LE);
}
return(XML_CHAR_ENCODING_NONE);
}
@ -364,7 +669,8 @@ static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
*/
xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input,
xmlNewCharEncodingHandler(const char *name,
xmlCharEncodingInputFunc input,
xmlCharEncodingOutputFunc output) {
xmlCharEncodingHandlerPtr handler;
char upper[500];
@ -429,11 +735,11 @@ xmlInitCharEncodingHandlers(void) {
return;
}
xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
#ifdef HAVE_UNICODE_H
#else
/* xmlNewCharEncodingHandler("UTF-16", UTF16ToUTF8, UTF8ToUTF16); */
xmlUTF16LEHandler =
xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
xmlUTF16BEHandler =
xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
#endif
}
/**
@ -493,7 +799,52 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
if (handlers == NULL) xmlInitCharEncodingHandlers();
/* TODO xmlGetCharEncodingHandler !!!!!!! */
switch (enc) {
case XML_CHAR_ENCODING_ERROR:
return(NULL);
case XML_CHAR_ENCODING_NONE:
return(NULL);
case XML_CHAR_ENCODING_UTF8:
return(NULL);
case XML_CHAR_ENCODING_UTF16LE:
return(xmlUTF16LEHandler);
case XML_CHAR_ENCODING_UTF16BE:
return(xmlUTF16BEHandler);
case XML_CHAR_ENCODING_EBCDIC:
return(NULL);
case XML_CHAR_ENCODING_UCS4LE:
return(NULL);
case XML_CHAR_ENCODING_UCS4BE:
return(NULL);
case XML_CHAR_ENCODING_UCS4_2143:
return(NULL);
case XML_CHAR_ENCODING_UCS4_3412:
return(NULL);
case XML_CHAR_ENCODING_UCS2:
return(NULL);
case XML_CHAR_ENCODING_8859_1:
return(NULL);
case XML_CHAR_ENCODING_8859_2:
return(NULL);
case XML_CHAR_ENCODING_8859_3:
return(NULL);
case XML_CHAR_ENCODING_8859_4:
return(NULL);
case XML_CHAR_ENCODING_8859_5:
return(NULL);
case XML_CHAR_ENCODING_8859_6:
return(NULL);
case XML_CHAR_ENCODING_8859_7:
return(NULL);
case XML_CHAR_ENCODING_8859_8:
return(NULL);
case XML_CHAR_ENCODING_8859_9:
return(NULL);
case XML_CHAR_ENCODING_2022_JP:
case XML_CHAR_ENCODING_SHIFT_JIS:
case XML_CHAR_ENCODING_EUC_JP:
return(NULL);
}
return(NULL);
}

View File

@ -67,11 +67,11 @@ typedef enum {
* Returns the number of byte written, or -1 by lack of space.
*/
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
unsigned char* in, int inlen);
const unsigned char* in, int *inlen);
/**
* xmlCharEncodingInputFunc:
* xmlCharEncodingOutputFunc:
* @out: a pointer ot an array of bytes to store the result
* @outlen: the lenght of @out
* @in: a pointer ot an array of UTF-8 chars
@ -84,7 +84,7 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
* if the transcoding failed.
*/
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
unsigned char* in, int inlen);
const unsigned char* in, int *inlen);
/*
* Block defining the handlers for non UTF-8 encodings.
@ -101,10 +101,12 @@ struct _xmlCharEncodingHandler {
void xmlInitCharEncodingHandlers (void);
void xmlCleanupCharEncodingHandlers (void);
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in);
xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in,
int len);
xmlCharEncoding xmlParseCharEncoding (const char* name);
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
int xmlCheckUTF8 (const unsigned char *utf);
#ifdef __cplusplus

View File

@ -21,6 +21,8 @@
#include "entities.h"
#include "parser.h"
#define DEBUG_ENT_REF /* debugging of cross entities dependancies */
/*
* The XML predefined entities.
*/
@ -45,6 +47,8 @@ xmlEntitiesTablePtr xmlPredefinedEntities = NULL;
void xmlFreeEntity(xmlEntityPtr entity) {
if (entity == NULL) return;
if (entity->children)
xmlFreeNodeList(entity->children);
if (entity->name != NULL)
xmlFree((char *) entity->name);
if (entity->ExternalID != NULL)
@ -55,22 +59,31 @@ void xmlFreeEntity(xmlEntityPtr entity) {
xmlFree((char *) entity->content);
if (entity->orig != NULL)
xmlFree((char *) entity->orig);
#ifdef WITH_EXTRA_ENT_DETECT
if (entity->entTab != NULL) {
int i;
for (i = 0; i < entity->entNr; i++)
xmlFree(entity->entTab[i]);
xmlFree(entity->entTab);
}
#endif
memset(entity, -1, sizeof(xmlEntity));
xmlFree(entity);
}
/*
* xmlAddEntity : register a new entity for an entities table.
*/
static void
static xmlEntityPtr
xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
int i;
xmlEntityPtr cur;
int len;
xmlEntityPtr ret;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
if (!xmlStrcmp(cur->name, name)) {
ret = table->table[i];
if (!xmlStrcmp(ret->name, name)) {
/*
* The entity is already defined in this Dtd, the spec says to NOT
* override it ... Is it worth a Warning ??? !!!
@ -78,15 +91,15 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
*/
if (((type == XML_INTERNAL_PARAMETER_ENTITY) ||
(type == XML_EXTERNAL_PARAMETER_ENTITY)) &&
((cur->type == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->type == XML_EXTERNAL_PARAMETER_ENTITY)))
return;
((ret->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(ret->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
return(NULL);
else
if (((type != XML_INTERNAL_PARAMETER_ENTITY) &&
(type != XML_EXTERNAL_PARAMETER_ENTITY)) &&
((cur->type != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY)))
return;
((ret->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(ret->etype != XML_EXTERNAL_PARAMETER_ENTITY)))
return(NULL);
}
}
if (table->nb_entities >= table->max_entities) {
@ -94,35 +107,43 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
* need more elements.
*/
table->max_entities *= 2;
table->table = (xmlEntityPtr)
xmlRealloc(table->table, table->max_entities * sizeof(xmlEntity));
table->table = (xmlEntityPtr *)
xmlRealloc(table->table,
table->max_entities * sizeof(xmlEntityPtr));
if (table->table == NULL) {
perror("realloc failed");
return;
return(NULL);
}
}
cur = &table->table[table->nb_entities];
cur->name = xmlStrdup(name);
for (len = 0;name[0] != 0;name++)len++;
cur->len = len;
cur->type = type;
ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
if (ret == NULL) {
fprintf(stderr, "xmlAddEntity: out of memory\n");
return(NULL);
}
memset(ret, 0, sizeof(xmlEntity));
ret->type = XML_ENTITY_DECL;
table->table[table->nb_entities] = ret;
/*
* fill the structure.
*/
ret->name = xmlStrdup(name);
ret->etype = type;
if (ExternalID != NULL)
cur->ExternalID = xmlStrdup(ExternalID);
else
cur->ExternalID = NULL;
ret->ExternalID = xmlStrdup(ExternalID);
if (SystemID != NULL)
cur->SystemID = xmlStrdup(SystemID);
else
cur->SystemID = NULL;
ret->SystemID = xmlStrdup(SystemID);
if (content != NULL) {
cur->length = xmlStrlen(content);
cur->content = xmlStrndup(content, cur->length);
ret->length = xmlStrlen(content);
ret->content = xmlStrndup(content, ret->length);
} else {
cur->length = 0;
cur->content = NULL;
ret->length = 0;
ret->content = NULL;
}
cur->orig = NULL;
ret->orig = NULL;
table->nb_entities++;
return(ret);
}
/**
@ -182,7 +203,7 @@ xmlGetPredefinedEntity(const xmlChar *name) {
if (xmlPredefinedEntities == NULL)
xmlInitializePredefinedEntities();
for (i = 0;i < xmlPredefinedEntities->nb_entities;i++) {
cur = &xmlPredefinedEntities->table[i];
cur = xmlPredefinedEntities->table[i];
if (!xmlStrcmp(cur->name, name)) return(cur);
}
return(NULL);
@ -197,24 +218,50 @@ xmlGetPredefinedEntity(const xmlChar *name) {
* @SystemID: the entity system ID if available
* @content: the entity content
*
* Register a new entity for this document DTD.
* Register a new entity for this document DTD external subset.
*
* Returns a pointer to the entity or NULL in case of error
*/
void
xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
const xmlChar *ExternalID, const xmlChar *SystemID,
const xmlChar *content) {
xmlEntitiesTablePtr table;
xmlEntityPtr ret;
xmlDtdPtr dtd;
if (doc == NULL) {
fprintf(stderr,
"xmlAddDtdEntity: doc == NULL !\n");
return(NULL);
}
if (doc->extSubset == NULL) {
fprintf(stderr,
"xmlAddDtdEntity: document without external subset !\n");
return;
return(NULL);
}
table = (xmlEntitiesTablePtr) doc->extSubset->entities;
dtd = doc->extSubset;
table = (xmlEntitiesTablePtr) dtd->entities;
if (table == NULL) {
table = xmlCreateEntitiesTable();
doc->extSubset->entities = table;
dtd->entities = table;
}
xmlAddEntity(table, name, type, ExternalID, SystemID, content);
ret = xmlAddEntity(table, name, type, ExternalID, SystemID, content);
if (ret == NULL) return(NULL);
/*
* Link it to the Dtd
*/
ret->parent = dtd;
ret->doc = dtd->doc;
if (dtd->last == NULL) {
dtd->children = dtd->last = (xmlNodePtr) ret;
} else {
dtd->last->next = (xmlNodePtr) ret;
ret->prev = dtd->last;
dtd->last = (xmlNodePtr) ret;
}
return(ret);
}
/**
@ -227,29 +274,186 @@ xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
* @content: the entity content
*
* Register a new entity for this document.
*
* Returns a pointer to the entity or NULL in case of error
*/
void
xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
const xmlChar *ExternalID, const xmlChar *SystemID,
const xmlChar *content) {
xmlEntitiesTablePtr table;
xmlEntityPtr ret;
xmlDtdPtr dtd;
if (doc == NULL) {
fprintf(stderr,
"xmlAddDocEntity: document is NULL !\n");
return;
return(NULL);
}
if (doc->intSubset == NULL) {
fprintf(stderr,
"xmlAddDtdEntity: document without internal subset !\n");
return;
return(NULL);
}
dtd = doc->intSubset;
table = (xmlEntitiesTablePtr) doc->intSubset->entities;
if (table == NULL) {
table = xmlCreateEntitiesTable();
doc->intSubset->entities = table;
}
xmlAddEntity(table, name, type, ExternalID, SystemID, content);
ret = xmlAddEntity(table, name, type, ExternalID, SystemID, content);
if (ret == NULL) return(NULL);
/*
* Link it to the Dtd
*/
ret->parent = dtd;
ret->doc = dtd->doc;
if (dtd->last == NULL) {
dtd->children = dtd->last = (xmlNodePtr) ret;
} else {
dtd->last->next = (xmlNodePtr) ret;
ret->prev = dtd->last;
dtd->last = (xmlNodePtr) ret;
}
return(ret);
}
#ifdef WITH_EXTRA_ENT_DETECT
/**
* xmlEntityCheckReference:
* @ent: an existing entity
* @to: the entity name it's referencing
*
* Function to keep track of references and detect cycles (well formedness
* errors !).
*
* Returns: 0 if Okay, -1 in case of general error, 1 in case of loop
* detection.
*/
int
xmlEntityCheckReference(xmlEntityPtr ent, const xmlChar *to) {
int i;
xmlDocPtr doc;
if (ent == NULL) return(-1);
if (to == NULL) return(-1);
doc = ent->doc;
if (doc == NULL) return(-1);
#ifdef DEBUG_ENT_REF
printf("xmlEntityCheckReference(%s to %s)\n", ent->name, to);
#endif
/*
* Do a recursive checking
*/
for (i = 0;i < ent->entNr;i++) {
xmlEntityPtr indir = NULL;
if (!xmlStrcmp(to, ent->entTab[i]))
return(1);
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
indir = xmlGetDocEntity(doc, ent->entTab[i]);
break;
case XML_INTERNAL_PARAMETER_ENTITY:
case XML_EXTERNAL_PARAMETER_ENTITY:
indir = xmlGetDtdEntity(doc, ent->entTab[i]);
break;
case XML_INTERNAL_PREDEFINED_ENTITY:
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
break;
}
if (xmlEntityCheckReference(indir, to) == 1)
return(1);
}
return(0);
}
/**
* xmlEntityAddReference:
* @ent: an existing entity
* @to: the entity name it's referencing
*
* Function to register reuse of an existing entity from a (new) one
* Used to keep track of references and detect cycles (well formedness
* errors !).
*
* Returns: 0 if Okay, -1 in case of general error, 1 in case of loop
* detection.
*/
int
xmlEntityAddReference(xmlEntityPtr ent, const xmlChar *to) {
int i;
xmlDocPtr doc;
xmlEntityPtr indir = NULL;
if (ent == NULL) return(-1);
if (to == NULL) return(-1);
doc = ent->doc;
if (doc == NULL) return(-1);
#ifdef DEBUG_ENT_REF
printf("xmlEntityAddReference(%s to %s)\n", ent->name, to);
#endif
if (ent->entTab == NULL) {
ent->entNr = 0;
ent->entMax = 5;
ent->entTab = (xmlChar **) xmlMalloc(ent->entMax * sizeof(xmlChar *));
if (ent->entTab == NULL) {
fprintf(stderr, "xmlEntityAddReference: out of memory !\n");
return(-1);
}
}
for (i = 0;i < ent->entNr;i++) {
if (!xmlStrcmp(to, ent->entTab[i]))
return(0);
}
/*
* Do a recursive checking
*/
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
indir = xmlGetDocEntity(doc, to);
break;
case XML_INTERNAL_PARAMETER_ENTITY:
case XML_EXTERNAL_PARAMETER_ENTITY:
indir = xmlGetDtdEntity(doc, to);
break;
case XML_INTERNAL_PREDEFINED_ENTITY:
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
break;
}
if ((indir != NULL) &&
(xmlEntityCheckReference(indir, ent->name) == 1))
return(1);
/*
* Add this to the list
*/
if (ent->entMax <= ent->entNr) {
ent->entMax *= 2;
ent->entTab = (xmlChar **) xmlRealloc(ent->entTab,
ent->entMax * sizeof(xmlChar *));
if (ent->entTab == NULL) {
fprintf(stderr, "xmlEntityAddReference: out of memory !\n");
return(-1);
}
}
ent->entTab[ent->entNr++] = xmlStrdup(to);
return(0);
}
#endif
/**
* xmlGetParameterEntity:
@ -270,27 +474,27 @@ xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->intSubset->entities;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
if (((cur->type == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->type == XML_EXTERNAL_PARAMETER_ENTITY)) &&
cur = table->table[i];
if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
(!xmlStrcmp(cur->name, name))) return(cur);
}
}
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
if (((cur->type == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->type == XML_EXTERNAL_PARAMETER_ENTITY)) &&
cur = table->table[i];
if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
(!xmlStrcmp(cur->name, name))) return(cur);
}
}
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
if (((cur->type == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->type == XML_EXTERNAL_PARAMETER_ENTITY)) &&
cur = table->table[i];
if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
(!xmlStrcmp(cur->name, name))) return(cur);
}
}
@ -316,9 +520,9 @@ xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
if ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY) &&
cur = table->table[i];
if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
(!xmlStrcmp(cur->name, name))) return(cur);
}
}
@ -345,18 +549,18 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->intSubset->entities;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
if ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY) &&
cur = table->table[i];
if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
(!xmlStrcmp(cur->name, name))) return(cur);
}
}
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
if ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY) &&
cur = table->table[i];
if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
(!xmlStrcmp(cur->name, name))) return(cur);
}
}
@ -364,9 +568,9 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
xmlInitializePredefinedEntities();
table = xmlPredefinedEntities;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
if ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->type != XML_EXTERNAL_PARAMETER_ENTITY) &&
cur = table->table[i];
if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
(cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
(!xmlStrcmp(cur->name, name))) return(cur);
}
@ -612,6 +816,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
*out++ = 'o';
*out++ = 't';
*out++ = ';';
#if 0
} else if ((*cur == '\'') && (!html)) {
*out++ = '&';
*out++ = 'a';
@ -619,15 +824,20 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
*out++ = 'o';
*out++ = 's';
*out++ = ';';
#endif
} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
(*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
/*
* default case, just copy !
*/
*out++ = *cur;
#ifndef USE_UTF_8
} else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
char buf[10], *ptr;
} else if (*cur >= 0x80) {
if (html) {
char buf[15], *ptr;
/*
* TODO: improve by searching in html40EntitiesTable
*/
#ifdef HAVE_SNPRINTF
snprintf(buf, 9, "&#%d;", *cur);
#else
@ -635,7 +845,80 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
#endif
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
} else if (doc->encoding != NULL) {
/*
* TODO !!!
*/
*out++ = *cur;
} else {
/*
* We assume we have UTF-8 input.
*/
char buf[10], *ptr;
int val = 0, l = 1;
if (*cur < 0xC0) {
fprintf(stderr,
"xmlEncodeEntitiesReentrant : input not UTF-8\n");
doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
#ifdef HAVE_SNPRINTF
snprintf(buf, 9, "&#%d;", *cur);
#else
sprintf(buf, "&#%d;", *cur);
#endif
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
continue;
} else if (*cur < 0xE0) {
val = (cur[0]) & 0x1F;
val <<= 6;
val |= (cur[1]) & 0x3F;
l = 2;
} else if (*cur < 0xF0) {
val = (cur[0]) & 0x0F;
val <<= 6;
val |= (cur[1]) & 0x3F;
val <<= 6;
val |= (cur[2]) & 0x3F;
l = 3;
} else if (*cur < 0xF8) {
val = (cur[0]) & 0x07;
val <<= 6;
val |= (cur[1]) & 0x3F;
val <<= 6;
val |= (cur[2]) & 0x3F;
val <<= 6;
val |= (cur[3]) & 0x3F;
l = 4;
}
if ((l == 1) || (!IS_CHAR(val))) {
fprintf(stderr,
"xmlEncodeEntitiesReentrant : char out of range\n");
doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
#ifdef HAVE_SNPRINTF
snprintf(buf, 9, "&#%d;", *cur);
#else
sprintf(buf, "&#%d;", *cur);
#endif
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
cur++;
continue;
}
/*
* We could do multiple things here. Just save as a char ref
*/
#ifdef HAVE_SNPRINTF
snprintf(buf, 14, "&#x%X;", val);
#else
sprintf(buf, "&#x%X;", val);
#endif
buf[14] = 0;
ptr = buf;
while (*ptr != 0) *out++ = *ptr++;
cur += l;
continue;
}
} else if (IS_CHAR(*cur)) {
char buf[10], *ptr;
@ -682,11 +965,11 @@ xmlCreateEntitiesTable(void) {
}
ret->max_entities = XML_MIN_ENTITIES_TABLE;
ret->nb_entities = 0;
ret->table = (xmlEntityPtr )
xmlMalloc(ret->max_entities * sizeof(xmlEntity));
ret->table = (xmlEntityPtr *)
xmlMalloc(ret->max_entities * sizeof(xmlEntityPtr));
if (ret == NULL) {
fprintf(stderr, "xmlCreateEntitiesTable : xmlMalloc(%ld) failed\n",
ret->max_entities * (long)sizeof(xmlEntity));
ret->max_entities * (long)sizeof(xmlEntityPtr));
xmlFree(ret);
return(NULL);
}
@ -706,7 +989,7 @@ xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
if (table == NULL) return;
for (i = 0;i < table->nb_entities;i++) {
xmlFreeEntity(&table->table[i]);
xmlFreeEntity(table->table[i]);
}
xmlFree(table->table);
xmlFree(table);
@ -731,8 +1014,8 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
return(NULL);
}
ret->table = (xmlEntityPtr) xmlMalloc(table->max_entities *
sizeof(xmlEntity));
ret->table = (xmlEntityPtr *) xmlMalloc(table->max_entities *
sizeof(xmlEntityPtr));
if (ret->table == NULL) {
fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
xmlFree(ret);
@ -741,34 +1024,119 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
ret->max_entities = table->max_entities;
ret->nb_entities = table->nb_entities;
for (i = 0;i < ret->nb_entities;i++) {
cur = &ret->table[i];
ent = &table->table[i];
cur->len = ent->len;
cur->type = ent->type;
cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
if (cur == NULL) {
fprintf(stderr, "xmlCopyEntityTable: out of memory !\n");
xmlFree(ret);
xmlFree(ret->table);
return(NULL);
}
memset(cur, 0, sizeof(xmlEntity));
cur->type = XML_ELEMENT_DECL;
ret->table[i] = cur;
ent = table->table[i];
cur->etype = ent->etype;
if (ent->name != NULL)
cur->name = xmlStrdup(ent->name);
else
cur->name = NULL;
if (ent->ExternalID != NULL)
cur->ExternalID = xmlStrdup(ent->ExternalID);
else
cur->ExternalID = NULL;
if (ent->SystemID != NULL)
cur->SystemID = xmlStrdup(ent->SystemID);
else
cur->SystemID = NULL;
if (ent->content != NULL)
cur->content = xmlStrdup(ent->content);
else
cur->content = NULL;
if (ent->orig != NULL)
cur->orig = xmlStrdup(ent->orig);
else
cur->orig = NULL;
}
return(ret);
}
/**
* xmlDumpEntityDecl:
* @buf: An XML buffer.
* @ent: An entity table
*
* This will dump the content of the entity table as an XML DTD definition
*/
void
xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, ent->name);
xmlBufferWriteChar(buf, " ");
if (ent->orig != NULL)
xmlBufferWriteQuotedString(buf, ent->orig);
else
xmlBufferWriteQuotedString(buf, ent->content);
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, ent->name);
if (ent->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, ent->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
}
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, ent->name);
if (ent->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, ent->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
}
if (ent->content != NULL) { /* Should be true ! */
xmlBufferWriteChar(buf, " NDATA ");
if (ent->orig != NULL)
xmlBufferWriteCHAR(buf, ent->orig);
else
xmlBufferWriteCHAR(buf, ent->content);
}
xmlBufferWriteChar(buf, ">\n");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY % ");
xmlBufferWriteCHAR(buf, ent->name);
xmlBufferWriteChar(buf, " ");
if (ent->orig == NULL)
xmlBufferWriteQuotedString(buf, ent->content);
else
xmlBufferWriteQuotedString(buf, ent->orig);
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY % ");
xmlBufferWriteCHAR(buf, ent->name);
if (ent->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, ent->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, ent->SystemID);
}
xmlBufferWriteChar(buf, ">\n");
break;
default:
fprintf(stderr,
"xmlDumpEntitiesTable: internal: unknown type %d\n",
ent->etype);
}
}
/**
* xmlDumpEntitiesTable:
* @buf: An XML buffer.
@ -784,81 +1152,7 @@ xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
if (table == NULL) return;
for (i = 0;i < table->nb_entities;i++) {
cur = &table->table[i];
switch (cur->type) {
case XML_INTERNAL_GENERAL_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, cur->name);
xmlBufferWriteChar(buf, " ");
if (cur->orig != NULL)
xmlBufferWriteQuotedString(buf, cur->orig);
else
xmlBufferWriteQuotedString(buf, cur->content);
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, cur->name);
if (cur->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, cur->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
}
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY ");
xmlBufferWriteCHAR(buf, cur->name);
if (cur->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, cur->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
}
if (cur->content != NULL) { /* Should be true ! */
xmlBufferWriteChar(buf, " NDATA ");
if (cur->orig != NULL)
xmlBufferWriteCHAR(buf, cur->orig);
else
xmlBufferWriteCHAR(buf, cur->content);
}
xmlBufferWriteChar(buf, ">\n");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY % ");
xmlBufferWriteCHAR(buf, cur->name);
xmlBufferWriteChar(buf, " ");
if (cur->orig == NULL)
xmlBufferWriteQuotedString(buf, cur->content);
else
xmlBufferWriteQuotedString(buf, cur->orig);
xmlBufferWriteChar(buf, ">\n");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
xmlBufferWriteChar(buf, "<!ENTITY % ");
xmlBufferWriteCHAR(buf, cur->name);
if (cur->ExternalID != NULL) {
xmlBufferWriteChar(buf, " PUBLIC ");
xmlBufferWriteQuotedString(buf, cur->ExternalID);
xmlBufferWriteChar(buf, " ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
} else {
xmlBufferWriteChar(buf, " SYSTEM ");
xmlBufferWriteQuotedString(buf, cur->SystemID);
}
xmlBufferWriteChar(buf, ">\n");
break;
default:
fprintf(stderr,
"xmlDumpEntitiesTable: internal: unknown type %d\n",
cur->type);
}
cur = table->table[i];
xmlDumpEntityDecl(buf, cur);
}
}

View File

@ -15,12 +15,17 @@
extern "C" {
#endif
#define XML_INTERNAL_GENERAL_ENTITY 1
#define XML_EXTERNAL_GENERAL_PARSED_ENTITY 2
#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3
#define XML_INTERNAL_PARAMETER_ENTITY 4
#define XML_EXTERNAL_PARAMETER_ENTITY 5
#define XML_INTERNAL_PREDEFINED_ENTITY 6
/*
* The different valid entity types
*/
typedef enum {
XML_INTERNAL_GENERAL_ENTITY = 1,
XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2,
XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3,
XML_INTERNAL_PARAMETER_ENTITY = 4,
XML_EXTERNAL_PARAMETER_ENTITY = 5,
XML_INTERNAL_PREDEFINED_ENTITY = 6
} xmlEntityType;
/*
* An unit of storage for an entity, contains the string, the value
@ -30,14 +35,32 @@ extern "C" {
typedef struct _xmlEntity xmlEntity;
typedef xmlEntity *xmlEntityPtr;
struct _xmlEntity {
int type; /* The entity type */
int len; /* The lenght of the name */
const xmlChar *name; /* Name of the entity */
const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
xmlChar *content; /* The entity content or ndata if unparsed */
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ENTITY_DECL, must be second ! */
const xmlChar *name; /* Attribute name */
struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlChar *orig; /* content without ref substitution */
xmlChar *content; /* content or ndata if unparsed */
int length; /* the content length */
xmlChar *orig; /* The entity cont without ref substitution */
xmlEntityType etype; /* The entity type */
const xmlChar *ExternalID; /* External identifier for PUBLIC */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
#ifdef WITH_EXTRA_ENT_DETECT
/* Referenced entities name stack */
xmlChar *ent; /* Current parsed Node */
int entNr; /* Depth of the parsing stack */
int entMax; /* Max depth of the parsing stack */
xmlChar * *entTab; /* array of nodes */
#endif
};
/*
@ -52,7 +75,7 @@ typedef xmlEntitiesTable *xmlEntitiesTablePtr;
struct _xmlEntitiesTable {
int nb_entities; /* number of elements stored */
int max_entities; /* maximum number of elements */
xmlEntityPtr table; /* the table of entities */
xmlEntityPtr *table; /* the table of entities */
};
@ -60,13 +83,13 @@ struct _xmlEntitiesTable {
* External functions :
*/
void xmlAddDocEntity (xmlDocPtr doc,
xmlEntityPtr xmlAddDocEntity (xmlDocPtr doc,
const xmlChar *name,
int type,
const xmlChar *ExternalID,
const xmlChar *SystemID,
const xmlChar *content);
void xmlAddDtdEntity (xmlDocPtr doc,
xmlEntityPtr xmlAddDtdEntity (xmlDocPtr doc,
const xmlChar *name,
int type,
const xmlChar *ExternalID,
@ -88,9 +111,16 @@ xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table);
void xmlFreeEntitiesTable (xmlEntitiesTablePtr table);
void xmlDumpEntitiesTable (xmlBufferPtr buf,
xmlEntitiesTablePtr table);
void xmlDumpEntityDecl (xmlBufferPtr buf,
xmlEntityPtr ent);
xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table);
void xmlCleanupPredefinedEntities(void);
#ifdef WITH_EXTRA_ENT_DETECT
int xmlEntityAddReference (xmlEntityPtr ent,
const xmlChar *to);
#endif
#ifdef __cplusplus
}
#endif

View File

@ -67,11 +67,11 @@ typedef enum {
* Returns the number of byte written, or -1 by lack of space.
*/
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
unsigned char* in, int inlen);
const unsigned char* in, int *inlen);
/**
* xmlCharEncodingInputFunc:
* xmlCharEncodingOutputFunc:
* @out: a pointer ot an array of bytes to store the result
* @outlen: the lenght of @out
* @in: a pointer ot an array of UTF-8 chars
@ -84,7 +84,7 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
* if the transcoding failed.
*/
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
unsigned char* in, int inlen);
const unsigned char* in, int *inlen);
/*
* Block defining the handlers for non UTF-8 encodings.
@ -101,10 +101,12 @@ struct _xmlCharEncodingHandler {
void xmlInitCharEncodingHandlers (void);
void xmlCleanupCharEncodingHandlers (void);
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in);
xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in,
int len);
xmlCharEncoding xmlParseCharEncoding (const char* name);
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
int xmlCheckUTF8 (const unsigned char *utf);
#ifdef __cplusplus

View File

@ -15,12 +15,17 @@
extern "C" {
#endif
#define XML_INTERNAL_GENERAL_ENTITY 1
#define XML_EXTERNAL_GENERAL_PARSED_ENTITY 2
#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3
#define XML_INTERNAL_PARAMETER_ENTITY 4
#define XML_EXTERNAL_PARAMETER_ENTITY 5
#define XML_INTERNAL_PREDEFINED_ENTITY 6
/*
* The different valid entity types
*/
typedef enum {
XML_INTERNAL_GENERAL_ENTITY = 1,
XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2,
XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3,
XML_INTERNAL_PARAMETER_ENTITY = 4,
XML_EXTERNAL_PARAMETER_ENTITY = 5,
XML_INTERNAL_PREDEFINED_ENTITY = 6
} xmlEntityType;
/*
* An unit of storage for an entity, contains the string, the value
@ -30,14 +35,32 @@ extern "C" {
typedef struct _xmlEntity xmlEntity;
typedef xmlEntity *xmlEntityPtr;
struct _xmlEntity {
int type; /* The entity type */
int len; /* The lenght of the name */
const xmlChar *name; /* Name of the entity */
const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
xmlChar *content; /* The entity content or ndata if unparsed */
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ENTITY_DECL, must be second ! */
const xmlChar *name; /* Attribute name */
struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlChar *orig; /* content without ref substitution */
xmlChar *content; /* content or ndata if unparsed */
int length; /* the content length */
xmlChar *orig; /* The entity cont without ref substitution */
xmlEntityType etype; /* The entity type */
const xmlChar *ExternalID; /* External identifier for PUBLIC */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
#ifdef WITH_EXTRA_ENT_DETECT
/* Referenced entities name stack */
xmlChar *ent; /* Current parsed Node */
int entNr; /* Depth of the parsing stack */
int entMax; /* Max depth of the parsing stack */
xmlChar * *entTab; /* array of nodes */
#endif
};
/*
@ -52,7 +75,7 @@ typedef xmlEntitiesTable *xmlEntitiesTablePtr;
struct _xmlEntitiesTable {
int nb_entities; /* number of elements stored */
int max_entities; /* maximum number of elements */
xmlEntityPtr table; /* the table of entities */
xmlEntityPtr *table; /* the table of entities */
};
@ -60,13 +83,13 @@ struct _xmlEntitiesTable {
* External functions :
*/
void xmlAddDocEntity (xmlDocPtr doc,
xmlEntityPtr xmlAddDocEntity (xmlDocPtr doc,
const xmlChar *name,
int type,
const xmlChar *ExternalID,
const xmlChar *SystemID,
const xmlChar *content);
void xmlAddDtdEntity (xmlDocPtr doc,
xmlEntityPtr xmlAddDtdEntity (xmlDocPtr doc,
const xmlChar *name,
int type,
const xmlChar *ExternalID,
@ -88,9 +111,16 @@ xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table);
void xmlFreeEntitiesTable (xmlEntitiesTablePtr table);
void xmlDumpEntitiesTable (xmlBufferPtr buf,
xmlEntitiesTablePtr table);
void xmlDumpEntityDecl (xmlBufferPtr buf,
xmlEntityPtr ent);
xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table);
void xmlCleanupPredefinedEntities(void);
#ifdef WITH_EXTRA_ENT_DETECT
int xmlEntityAddReference (xmlEntityPtr ent,
const xmlChar *to);
#endif
#ifdef __cplusplus
}
#endif

View File

@ -49,6 +49,9 @@ struct _xmlParserInput {
int col; /* Current column */
int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */
const xmlChar *encoding; /* the encoding string for entity */
const xmlChar *version; /* the version string for entity */
int standalone; /* Was that entity marked standalone */
};
/**
@ -95,6 +98,7 @@ typedef enum {
XML_PARSER_ENTITY_DECL, /* within an entity declaration */
XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
XML_PARSER_EPILOG /* the Misc* after the last end tag */
} xmlParserInputState;
@ -151,7 +155,7 @@ struct _xmlParserCtxt {
char *directory; /* the data directory */
/* Node name stack only used for HTML parsing */
/* Node name stack */
xmlChar *name; /* Current parsed Node */
int nameNr; /* Depth of the parsing stack */
int nameMax; /* Max depth of the parsing stack */
@ -160,6 +164,20 @@ struct _xmlParserCtxt {
long nbChars; /* number of xmlChar processed */
long checkIndex; /* used by progressive parsing lookup */
int keepBlanks; /* ugly but ... */
int disableSAX; /* SAX callbacks are disabled */
int inSubset; /* Parsing is in int 1/ext 2 subset */
xmlChar * intSubName; /* name of subset */
xmlChar * extSubURI; /* URI of external subset */
xmlChar * extSubSystem; /* SYSTEM ID of external subset */
/* xml:space values */
int * space; /* Should the parser preserve spaces */
int spaceNr; /* Depth of the parsing stack */
int spaceMax; /* Max depth of the parsing stack */
int * spaceTab; /* array of space infos */
int depth; /* to prevent entity substitution loops */
xmlParserInputPtr entity; /* used to check entities boundaries */
};
/**
@ -183,6 +201,8 @@ typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
const xmlChar *publicId, const xmlChar *systemId);
typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID);
typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID);
typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
const xmlChar *name);
typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
@ -254,6 +274,7 @@ struct _xmlSAXHandler {
fatalErrorSAXFunc fatalError;
getParameterEntitySAXFunc getParameterEntity;
cdataBlockSAXFunc cdataBlock;
externalSubsetSAXFunc externalSubset;
};
/**
@ -278,7 +299,7 @@ extern xmlSAXHandler htmlDefaultSAXHandler;
*/
extern int xmlSubstituteEntitiesDefaultValue;
extern int xmlGetWarningsDefaultValue;
/**
@ -363,6 +384,20 @@ xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
const xmlChar *ExternalID,
const xmlChar *SystemID);
int xmlParseBalancedChunkMemory(xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *string,
xmlNodePtr *list);
int xmlParseExternalEntity (xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *URL,
const xmlChar *ID,
xmlNodePtr *list);
/**
* SAX initialization routines
*/

View File

@ -17,31 +17,6 @@ extern "C" {
#define XML_MAX_NAMELEN 1000
/**
* A few macros needed to help building the parser.
*/
/* #define UNICODE */
#ifdef UNICODE
typedef unsigned long CHARVAL;
#define NEXTCHARVAL(p) (unsigned long) \
((*(p) == 0) ? (unsigned long) 0 : \
((*(p) < 0x80) ? (unsigned long) (*(p)++) : \
(*(p) < 0xC0) ? (unsigned long) 0 : \
(*(p) < 0xE0) ? ((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) : \
(*(p) < 0xF0) ? (((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
(*(p)++ & 0x3F)) : \
(*(p) < 0xF8) ? ((((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
(*(p)++ & 0x3F)) << 6 + (*(p)++ & 0x3F)) : 0))
#else
typedef unsigned char CHARVAL;
#define NEXTCHARVAL(p) (unsigned long) *(p);
#define SKIPCHARVAL(p) (p)++;
#endif
#ifdef UNICODE
/************************************************************************
* *
* UNICODE version of the macros. *
@ -404,7 +379,7 @@ typedef unsigned char CHARVAL;
#define IS_EXTENDER(c) \
(((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
((c) == 0xec6) || ((c) == 0x3005) \
((c) == 0xec6) || ((c) == 0x3005) || \
(((c) >= 0x3031) && ((c) <= 0x3035)) || \
(((c) >= 0x309b) && ((c) <= 0x309e)) || \
(((c) >= 0x30fc) && ((c) <= 0x30fe)))
@ -423,65 +398,6 @@ typedef unsigned char CHARVAL;
*/
#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
#else
/************************************************************************
* *
* 8bits / ISO-Latin version of the macros. *
* *
************************************************************************/
/*
* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
* | [#x10000-#x10FFFF]
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
#define IS_CHAR(c) \
((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
/*
* [85] BaseChar ::= ... long list see REC ...
*/
#define IS_BASECHAR(c) \
((((c) >= 0x0041) && ((c) <= 0x005A)) || \
(((c) >= 0x0061) && ((c) <= 0x007A)) || \
(((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
(((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
(((c) >= 0x00F8) && ((c) <= 0x00FF)))
/*
* [88] Digit ::= ... long list see REC ...
*/
#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
/*
* [84] Letter ::= BaseChar | Ideographic
*/
#define IS_LETTER(c) IS_BASECHAR(c)
/*
* [87] CombiningChar ::= ... long list see REC ...
*/
#define IS_COMBINING(c) 0
/*
* [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
* #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
* [#x309D-#x309E] | [#x30FC-#x30FE]
*/
#define IS_EXTENDER(c) ((c) == 0xb7)
#endif /* !UNICODE */
/*
* Blank chars.
*
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
*/
#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
((c) == 0x0D))
/*
* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
@ -502,10 +418,10 @@ typedef unsigned char CHARVAL;
if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
#define MOVETO_ENDTAG(p) \
while (IS_CHAR(*p) && (*(p) != '>')) (p)++
while ((*p) && (*(p) != '>')) (p)++
#define MOVETO_STARTTAG(p) \
while (IS_CHAR(*p) && (*(p) != '<')) (p)++
while ((*p) && (*(p) != '<')) (p)++
/**
* Parser context
@ -514,10 +430,13 @@ xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
int size);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
xmlParserCtxtPtr xmlNewParserCtxt (void);
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
const xmlChar *ID,
const xmlChar *base);
void xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
xmlCharEncoding enc);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
/**
* Entities
@ -540,7 +459,8 @@ xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
/**
* Namespaces.
*/
xmlChar * xmlSplitQName (const xmlChar *name,
xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
const xmlChar *name,
xmlChar **prefix);
xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
@ -606,6 +526,7 @@ xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
void xmlParseMisc (xmlParserCtxtPtr ctxt);
void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
const xmlChar *ExternalID,
@ -624,6 +545,12 @@ xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
xmlChar end,
xmlChar end2,
xmlChar end3);
xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
const xmlChar *str,
int what,
xmlChar end,
xmlChar end2,
xmlChar end3);
/*
* Generated by MACROS on top of parser.c c.f. PUSH_AND_POP

View File

@ -36,24 +36,22 @@ typedef enum {
XML_DOCUMENT_TYPE_NODE= 10,
XML_DOCUMENT_FRAG_NODE= 11,
XML_NOTATION_NODE= 12,
XML_HTML_DOCUMENT_NODE= 13
XML_HTML_DOCUMENT_NODE= 13,
XML_DTD_NODE= 14,
XML_ELEMENT_DECL= 15,
XML_ATTRIBUTE_DECL= 16,
XML_ENTITY_DECL= 17
} xmlElementType;
/*
* Size of an internal character representation.
*
* Currently we use 8bit chars internal representation for memory efficiency,
* but the parser is not tied to that, just define UNICODE to switch to
* a 16 bits internal representation. Note that with 8 bits wide
* xmlChars one can still use UTF-8 to handle correctly non ISO-Latin
* input.
* We use 8bit chars internal representation for memory efficiency,
* Note that with 8 bits wide xmlChars one can still use UTF-8 to handle
* correctly non ISO-Latin input.
*/
#ifdef UNICODE
typedef unsigned short xmlChar;
#else
typedef unsigned char xmlChar;
#endif
#ifndef WIN32
#ifndef CHAR
@ -109,14 +107,25 @@ struct _xmlEnumeration {
typedef struct _xmlAttribute xmlAttribute;
typedef xmlAttribute *xmlAttributePtr;
struct _xmlAttribute {
const xmlChar *elem; /* Element holding the attribute */
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */
const xmlChar *name; /* Attribute name */
struct _xmlAttribute *next; /* list of attributes of an element */
xmlAttributeType type; /* The type */
struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
struct _xmlAttribute *nexth; /* next in hash table */
xmlAttributeType atype; /* The attribute type */
xmlAttributeDefault def; /* the default */
const xmlChar *defaultValue; /* or the default value */
xmlEnumerationPtr tree; /* or the enumeration tree if any */
const xmlChar *prefix; /* the namespace prefix if any */
const xmlChar *elem; /* Element holding the attribute */
};
/*
@ -156,8 +165,19 @@ typedef enum {
typedef struct _xmlElement xmlElement;
typedef xmlElement *xmlElementPtr;
struct _xmlElement {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ELEMENT_DECL, must be second ! */
const xmlChar *name; /* Element name */
xmlElementTypeVal type; /* The type */
struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlElementTypeVal etype; /* The type */
xmlElementContentPtr content; /* the allowed element content */
xmlAttributePtr attributes; /* List of the declared attributes */
};
@ -188,14 +208,25 @@ struct _xmlNs {
typedef struct _xmlDtd xmlDtd;
typedef xmlDtd *xmlDtdPtr;
struct _xmlDtd {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_DTD_NODE, must be second ! */
const xmlChar *name; /* Name of the DTD */
const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */
struct _xmlNode *children; /* the value of the property link */
struct _xmlNode *last; /* last child link */
struct _xmlDoc *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
/* End of common part */
void *notations; /* Hash table for notations if any */
void *elements; /* Hash table for elements if any */
void *attributes; /* Hash table for attributes if any */
void *entities; /* Hash table for entities if any */
/* struct xmlDtd *next; * next link for this document */
const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */
};
/*
@ -206,14 +237,17 @@ typedef xmlAttr *xmlAttrPtr;
struct _xmlAttr {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif
xmlElementType type; /* XML_ATTRIBUTE_NODE, must be third ! */
struct _xmlNode *node; /* attr->node link */
struct _xmlAttr *next; /* attribute list link */
xmlElementType type; /* XML_ATTRIBUTE_NODE, must be second ! */
const xmlChar *name; /* the name of the property */
struct _xmlNode *val; /* the value of the property */
struct _xmlNode *children; /* the value of the property */
struct _xmlNode *last; /* NULL */
struct _xmlNode *parent; /* child->parent link */
struct _xmlAttr *next; /* next sibling link */
struct _xmlAttr *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlNs *ns; /* pointer to the associated namespace */
xmlAttributeType atype; /* the attribute type if validating */
};
/*
@ -266,24 +300,25 @@ typedef xmlNode *xmlNodePtr;
struct _xmlNode {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif
xmlElementType type; /* type number in the DTD, must be third ! */
struct _xmlDoc *doc; /* the containing document */
xmlElementType type; /* type number, must be second ! */
const xmlChar *name; /* the name of the node, or the entity */
struct _xmlNode *children; /* parent->childs link */
struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlNode *childs; /* parent->childs link */
struct _xmlNode *last; /* last child link */
struct _xmlAttr *properties;/* properties list */
const xmlChar *name; /* the name of the node, or the entity */
struct _xmlDoc *doc; /* the containing document */
xmlNs *ns; /* pointer to the associated namespace */
xmlNs *nsDef; /* namespace definitions on this node */
#ifndef XML_USE_BUFFER_CONTENT
xmlChar *content; /* the content */
#else
xmlBufferPtr content; /* the content in a buffer */
#endif
/* End of common part */
struct _xmlAttr *properties;/* properties list */
xmlNs *nsDef; /* namespace definitions on this node */
};
/*
@ -294,20 +329,27 @@ typedef xmlDoc *xmlDocPtr;
struct _xmlDoc {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif
xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */
char *name; /* name/filename/URI of the document */
const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* encoding, if any */
struct _xmlNode *children; /* the document tree */
struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* autoreference to itself */
/* End of common part */
int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */
struct _xmlDtd *intSubset; /* the document internal subset */
struct _xmlDtd *extSubset; /* the document external subset */
struct _xmlNs *oldNs; /* Global namespace, the old way */
struct _xmlNode *root; /* the document tree */
const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* encoding, if any */
void *ids; /* Hash table for ID attributes if any */
void *refs; /* Hash table for IDREFs attributes if any */
const xmlChar *URL; /* The URI for that document */
};
/*
@ -422,6 +464,8 @@ xmlNodePtr xmlNewComment (const xmlChar *content);
xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc,
const xmlChar *content,
int len);
xmlNodePtr xmlNewCharRef (xmlDocPtr doc,
const xmlChar *name);
xmlNodePtr xmlNewReference (xmlDocPtr doc,
const xmlChar *name);
xmlNodePtr xmlCopyNode (xmlNodePtr node,
@ -513,13 +557,14 @@ xmlChar * xmlNodeGetContent (xmlNodePtr cur);
xmlChar * xmlNodeGetLang (xmlNodePtr cur);
void xmlNodeSetLang (xmlNodePtr cur,
const xmlChar *lang);
int xmlNodeGetSpacePreserve (xmlNodePtr cur);
xmlChar * xmlNodeGetBase (xmlDocPtr doc,
xmlNodePtr cur);
/*
* Removing content.
*/
int xmlRemoveProp (xmlAttrPtr attr); /* TODO */
int xmlRemoveProp (xmlAttrPtr attr);
int xmlRemoveNode (xmlNodePtr node); /* TODO */
/*
@ -532,6 +577,12 @@ void xmlBufferWriteChar (xmlBufferPtr buf,
void xmlBufferWriteQuotedString(xmlBufferPtr buf,
const xmlChar *string);
/*
* Namespace handling
*/
int xmlReconciliateNs (xmlDocPtr doc,
xmlNodePtr tree);
/*
* Saving
*/

View File

@ -29,6 +29,14 @@ struct _xmlValidCtxt {
void *userData; /* user specific data block */
xmlValidityErrorFunc error; /* the callback in case of errors */
xmlValidityWarningFunc warning; /* the callback in case of warning */
/* Node analysis stack used when validating within entities */
xmlNodePtr node; /* Current parsed Node */
int nodeNr; /* Depth of the parsing stack */
int nodeMax; /* Max depth of the parsing stack */
xmlNodePtr *nodeTab; /* array of nodes */
int finishDtd; /* finished validating the Dtd ? */
};
/*
@ -114,6 +122,8 @@ xmlNotationPtr xmlAddNotationDecl (xmlValidCtxtPtr ctxt,
const xmlChar *SystemID);
xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table);
void xmlFreeNotationTable(xmlNotationTablePtr table);
void xmlDumpNotationDecl (xmlBufferPtr buf,
xmlNotationPtr nota);
void xmlDumpNotationTable(xmlBufferPtr buf,
xmlNotationTablePtr table);
@ -122,6 +132,9 @@ xmlElementContentPtr xmlNewElementContent (xmlChar *name,
xmlElementContentType type);
xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content);
void xmlFreeElementContent(xmlElementContentPtr cur);
void xmlSprintfElementContent(char *buf,
xmlElementContentPtr content,
int glob);
/* Element */
xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt,
@ -133,6 +146,8 @@ xmlElementTablePtr xmlCopyElementTable (xmlElementTablePtr table);
void xmlFreeElementTable (xmlElementTablePtr table);
void xmlDumpElementTable (xmlBufferPtr buf,
xmlElementTablePtr table);
void xmlDumpElementDecl (xmlBufferPtr buf,
xmlElementPtr elem);
/* Enumeration */
xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name);
@ -144,6 +159,7 @@ xmlAttributePtr xmlAddAttributeDecl (xmlValidCtxtPtr ctxt,
xmlDtdPtr dtd,
const xmlChar *elem,
const xmlChar *name,
const xmlChar *prefix,
xmlAttributeType type,
xmlAttributeDefault def,
const xmlChar *defaultValue,
@ -152,6 +168,8 @@ xmlAttributeTablePtr xmlCopyAttributeTable (xmlAttributeTablePtr table);
void xmlFreeAttributeTable (xmlAttributeTablePtr table);
void xmlDumpAttributeTable (xmlBufferPtr buf,
xmlAttributeTablePtr table);
void xmlDumpAttributeDecl (xmlBufferPtr buf,
xmlAttributePtr attr);
/* IDs */
xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt,
@ -188,6 +206,10 @@ int xmlValidateRoot (xmlValidCtxtPtr ctxt,
int xmlValidateElementDecl (xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlElementPtr elem);
xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc,
xmlNodePtr elem,
const xmlChar *name,
const xmlChar *value);
int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlAttributePtr attr);
@ -199,6 +221,8 @@ int xmlValidateNotationDecl (xmlValidCtxtPtr ctxt,
int xmlValidateDtd (xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlDtdPtr dtd);
int xmlValidateDtdFinal (xmlValidCtxtPtr ctxt,
xmlDocPtr doc);
int xmlValidateDocument (xmlValidCtxtPtr ctxt,
xmlDocPtr doc);
int xmlValidateElement (xmlValidCtxtPtr ctxt,

View File

@ -8,7 +8,7 @@
#ifndef _DEBUG_MEMORY_ALLOC_
#define _DEBUG_MEMORY_ALLOC_
#define NO_DEBUG_MEMORY
/* #define NO_DEBUG_MEMORY */
#ifdef NO_DEBUG_MEMORY
#ifdef HAVE_MALLOC_H

View File

@ -3,14 +3,13 @@
%define prefix /usr
Summary: libXML library
Name: libxml
Name: libxml2
Version: %ver
Release: 1
Copyright: LGPL
Group: X11/Libraries
Source: ftp://ftp.gnome.org/pub/GNOME/sources/libxml/libxml-%{ver}.tar.gz
BuildRoot: /var/tmp/libxml-%{PACKAGE_VERSION}-root
Provides: libxml.so.0
URL: http://rpmfind.net/veillard/XML/
Prereq: /sbin/install-info

View File

@ -869,10 +869,11 @@ xmlNanoFTPConnect(void *ctx) {
else
#ifndef HAVE_SNPRINTF
len = sprintf(buf, "PASS libxml@%s\r\n",
hostname);
#else /* HAVE_SNPRINTF */
len = snprintf(buf, sizeof(buf), "PASS libxml@%s\r\n",
#endif /* HAVE_SNPRINTF */
hostname);
#endif /* HAVE_SNPRINTF */
#ifdef DEBUG_FTP
printf(buf);
#endif
@ -1226,11 +1227,13 @@ xmlNanoFTPGetConnection(void *ctx) {
portp = (unsigned char *) &dataAddr.sin_port;
#ifndef HAVE_SNPRINTF
len = sprintf(buf, "PORT %d,%d,%d,%d,%d,%d\r\n",
#else /* HAVE_SNPRINTF */
len = snprintf(buf, sizeof(buf), "PORT %d,%d,%d,%d,%d,%d\r\n",
#endif /* HAVE_SNPRINTF */
adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff,
portp[0] & 0xff, portp[1] & 0xff);
#else /* HAVE_SNPRINTF */
len = snprintf(buf, sizeof(buf), "PORT %d,%d,%d,%d,%d,%d\r\n",
adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff,
portp[0] & 0xff, portp[1] & 0xff);
#endif /* HAVE_SNPRINTF */
buf[sizeof(buf) - 1] = 0;
#ifdef DEBUG_FTP
printf(buf);
@ -1264,13 +1267,34 @@ int
xmlNanoFTPCloseConnection(void *ctx) {
xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx;
int res;
fd_set rfd, efd;
struct timeval tv;
close(ctxt->dataFd); ctxt->dataFd = -1;
tv.tv_sec = 15;
tv.tv_usec = 0;
FD_ZERO(&rfd);
FD_SET(ctxt->controlFd, &rfd);
FD_ZERO(&efd);
FD_SET(ctxt->controlFd, &efd);
res = select(ctxt->controlFd + 1, &rfd, NULL, &efd, &tv);
if (res < 0) {
#ifdef DEBUG_FTP
perror("select");
#endif
close(ctxt->controlFd); ctxt->controlFd = -1;
return(-1);
}
if (res == 0) {
fprintf(stderr, "xmlNanoFTPCloseConnection: timeout\n");
close(ctxt->controlFd); ctxt->controlFd = -1;
} else {
res = xmlNanoFTPGetResponse(ctxt);
if (res != 2) {
close(ctxt->controlFd); ctxt->controlFd = -1;
return(-1);
}
}
return(0);
}

View File

@ -753,7 +753,7 @@ retry:
}
ctxt->fd = ret;
if (proxy) {
#ifdef have_snprintf
#ifdef HAVE_SNPRINTF
if (ctxt->port != 80)
snprintf(buf, sizeof(buf),
"GET http://%s:%d%s HTTP/1.0\r\nHost: %s\r\n\r\n",

3399
parser.c

File diff suppressed because it is too large Load Diff

View File

@ -49,6 +49,9 @@ struct _xmlParserInput {
int col; /* Current column */
int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */
const xmlChar *encoding; /* the encoding string for entity */
const xmlChar *version; /* the version string for entity */
int standalone; /* Was that entity marked standalone */
};
/**
@ -95,6 +98,7 @@ typedef enum {
XML_PARSER_ENTITY_DECL, /* within an entity declaration */
XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
XML_PARSER_EPILOG /* the Misc* after the last end tag */
} xmlParserInputState;
@ -151,7 +155,7 @@ struct _xmlParserCtxt {
char *directory; /* the data directory */
/* Node name stack only used for HTML parsing */
/* Node name stack */
xmlChar *name; /* Current parsed Node */
int nameNr; /* Depth of the parsing stack */
int nameMax; /* Max depth of the parsing stack */
@ -160,6 +164,20 @@ struct _xmlParserCtxt {
long nbChars; /* number of xmlChar processed */
long checkIndex; /* used by progressive parsing lookup */
int keepBlanks; /* ugly but ... */
int disableSAX; /* SAX callbacks are disabled */
int inSubset; /* Parsing is in int 1/ext 2 subset */
xmlChar * intSubName; /* name of subset */
xmlChar * extSubURI; /* URI of external subset */
xmlChar * extSubSystem; /* SYSTEM ID of external subset */
/* xml:space values */
int * space; /* Should the parser preserve spaces */
int spaceNr; /* Depth of the parsing stack */
int spaceMax; /* Max depth of the parsing stack */
int * spaceTab; /* array of space infos */
int depth; /* to prevent entity substitution loops */
xmlParserInputPtr entity; /* used to check entities boundaries */
};
/**
@ -183,6 +201,8 @@ typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
const xmlChar *publicId, const xmlChar *systemId);
typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID);
typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID);
typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
const xmlChar *name);
typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
@ -254,6 +274,7 @@ struct _xmlSAXHandler {
fatalErrorSAXFunc fatalError;
getParameterEntitySAXFunc getParameterEntity;
cdataBlockSAXFunc cdataBlock;
externalSubsetSAXFunc externalSubset;
};
/**
@ -278,7 +299,7 @@ extern xmlSAXHandler htmlDefaultSAXHandler;
*/
extern int xmlSubstituteEntitiesDefaultValue;
extern int xmlGetWarningsDefaultValue;
/**
@ -363,6 +384,20 @@ xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
const xmlChar *ExternalID,
const xmlChar *SystemID);
int xmlParseBalancedChunkMemory(xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *string,
xmlNodePtr *list);
int xmlParseExternalEntity (xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *URL,
const xmlChar *ID,
xmlNodePtr *list);
/**
* SAX initialization routines
*/

View File

@ -17,31 +17,6 @@ extern "C" {
#define XML_MAX_NAMELEN 1000
/**
* A few macros needed to help building the parser.
*/
/* #define UNICODE */
#ifdef UNICODE
typedef unsigned long CHARVAL;
#define NEXTCHARVAL(p) (unsigned long) \
((*(p) == 0) ? (unsigned long) 0 : \
((*(p) < 0x80) ? (unsigned long) (*(p)++) : \
(*(p) < 0xC0) ? (unsigned long) 0 : \
(*(p) < 0xE0) ? ((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) : \
(*(p) < 0xF0) ? (((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
(*(p)++ & 0x3F)) : \
(*(p) < 0xF8) ? ((((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
(*(p)++ & 0x3F)) << 6 + (*(p)++ & 0x3F)) : 0))
#else
typedef unsigned char CHARVAL;
#define NEXTCHARVAL(p) (unsigned long) *(p);
#define SKIPCHARVAL(p) (p)++;
#endif
#ifdef UNICODE
/************************************************************************
* *
* UNICODE version of the macros. *
@ -404,7 +379,7 @@ typedef unsigned char CHARVAL;
#define IS_EXTENDER(c) \
(((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
((c) == 0xec6) || ((c) == 0x3005) \
((c) == 0xec6) || ((c) == 0x3005) || \
(((c) >= 0x3031) && ((c) <= 0x3035)) || \
(((c) >= 0x309b) && ((c) <= 0x309e)) || \
(((c) >= 0x30fc) && ((c) <= 0x30fe)))
@ -423,65 +398,6 @@ typedef unsigned char CHARVAL;
*/
#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
#else
/************************************************************************
* *
* 8bits / ISO-Latin version of the macros. *
* *
************************************************************************/
/*
* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
* | [#x10000-#x10FFFF]
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
#define IS_CHAR(c) \
((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
/*
* [85] BaseChar ::= ... long list see REC ...
*/
#define IS_BASECHAR(c) \
((((c) >= 0x0041) && ((c) <= 0x005A)) || \
(((c) >= 0x0061) && ((c) <= 0x007A)) || \
(((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
(((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
(((c) >= 0x00F8) && ((c) <= 0x00FF)))
/*
* [88] Digit ::= ... long list see REC ...
*/
#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
/*
* [84] Letter ::= BaseChar | Ideographic
*/
#define IS_LETTER(c) IS_BASECHAR(c)
/*
* [87] CombiningChar ::= ... long list see REC ...
*/
#define IS_COMBINING(c) 0
/*
* [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
* #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
* [#x309D-#x309E] | [#x30FC-#x30FE]
*/
#define IS_EXTENDER(c) ((c) == 0xb7)
#endif /* !UNICODE */
/*
* Blank chars.
*
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
*/
#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
((c) == 0x0D))
/*
* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
@ -502,10 +418,10 @@ typedef unsigned char CHARVAL;
if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
#define MOVETO_ENDTAG(p) \
while (IS_CHAR(*p) && (*(p) != '>')) (p)++
while ((*p) && (*(p) != '>')) (p)++
#define MOVETO_STARTTAG(p) \
while (IS_CHAR(*p) && (*(p) != '<')) (p)++
while ((*p) && (*(p) != '<')) (p)++
/**
* Parser context
@ -514,10 +430,13 @@ xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
int size);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
xmlParserCtxtPtr xmlNewParserCtxt (void);
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
const xmlChar *ID,
const xmlChar *base);
void xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
xmlCharEncoding enc);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
/**
* Entities
@ -540,7 +459,8 @@ xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
/**
* Namespaces.
*/
xmlChar * xmlSplitQName (const xmlChar *name,
xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
const xmlChar *name,
xmlChar **prefix);
xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
@ -606,6 +526,7 @@ xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
void xmlParseMisc (xmlParserCtxtPtr ctxt);
void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
const xmlChar *ExternalID,
@ -624,6 +545,12 @@ xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
xmlChar end,
xmlChar end2,
xmlChar end3);
xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
const xmlChar *str,
int what,
xmlChar end,
xmlChar end2,
xmlChar end3);
/*
* Generated by MACROS on top of parser.c c.f. PUSH_AND_POP

View File

@ -1,6 +1,5 @@
<?xml version="1.0"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
<!--DOCTYPE svg SYSTEM "svg-19990412.dtd"-->
<svg width="4in" height="3in">
<title>Kona Lavadome mountain bike
</title>

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@ -1,3 +1,6 @@
./test/VC/OneID:4: validity error: Element doc has too may ID attributes defined : id
<!ATTLIST doc id ID #IMPLIED>
^
./test/VC/OneID:4: validity error: Element doc has 2 ID attribute defined in the internal subset : id
<!ATTLIST doc id ID #IMPLIED>
^

View File

@ -1,3 +1,6 @@
./test/VC/OneID2:3: validity error: Element doc has 2 ID attribute defined in the internal subset : id
<!ATTLIST doc id ID #IMPLIED>
^
./test/VC/OneID2:4: validity error: Element doc has too may ID attributes defined : val
<!ELEMENT doc (#PCDATA)>
^

View File

@ -1,3 +1,3 @@
./test/VC/OneID3:2: validity error: Element doc has ID attribute defined in the external subset : id
<!ATTLIST doc id ID #IMPLIED>
dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val
<!ATTLIST doc val ID #IMPLIED>
^

View File

@ -1,3 +1,3 @@
./test/VC/UniqueElementTypeDeclaration:3: validity error: Redefinition of element a
dtds/a.dtd:1: validity error: Redefinition of element a
<!ELEMENT a (#PCDATA | b | c)*>
^

View File

@ -1,3 +1,4 @@
<?xml version="1.0"?>
<!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN" "http://www.sgmlsource.com/dtds/memo.dtd">
<MEMO/>
<MEMO>
</MEMO>

View File

@ -1,6 +1,6 @@
<?xml version="1.0"?>
<!DOCTYPE doc [
<!ENTITY % YN '"Yes"'>
<!ENTITY WhatHeSaid "He said %YN;">
<!ENTITY YN '"Yes"'>
<!ENTITY WhatHeSaid "He said &YN;">
]>
<doc>&WhatHeSaid;</doc>

View File

@ -0,0 +1,7 @@
<?xml version="1.0"?>
<!-- comment before the DTD -->
<!DOCTYPE doc [
<!ELEMENT doc ANY>
]>
<!-- comment after the DTD -->
<doc/>

View File

@ -1,5 +1,5 @@
<?xml version="1.0"?>
<EXAMPLE>
This is an inverted exclamation sign &#161;
This is an inverted exclamation sign &#xA1;
This is a space
</EXAMPLE>

View File

@ -6,5 +6,5 @@
<!ELEMENT para (#PCDATA)>
]>
<item>
<para>&apos;they called me &sampleEnt;&apos;</para>
<para>'they called me &sampleEnt;'</para>
</item>

View File

@ -4,7 +4,7 @@
<!ENTITY test2 "test 2">
]>
<doc>
<Content>Reten&#231;&#227;o</Content>
<Content>Reten&#xE7;&#xE3;o</Content>
<Content>&lt;&gt;</Content>
<Content>&test1;&test2;</Content>
</doc>

View File

@ -2,4 +2,5 @@
<!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [
<!ENTITY iso6.doc.date "29-May-1999">
]>
<spec/>
<spec>
</spec>

View File

@ -1,3 +1,4 @@
<?xml version="1.0"?>
<!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN" "http://www.sgmlsource.com/dtds/memo.dtd">
<MEMO/>
<MEMO>
</MEMO>

View File

@ -1,6 +1,6 @@
<?xml version="1.0"?>
<!DOCTYPE doc [
<!ENTITY % YN '"Yes"'>
<!ENTITY WhatHeSaid "He said %YN;">
<!ENTITY YN '"Yes"'>
<!ENTITY WhatHeSaid "He said &YN;">
]>
<doc>He said &quot;Yes&quot;</doc>
<doc>He said &amp;YN;</doc>

View File

@ -1,5 +1,5 @@
<?xml version="1.0"?>
<EXAMPLE>
This is an inverted exclamation sign &#161;
This is an inverted exclamation sign &#xA1;
This is a space
</EXAMPLE>

View File

@ -6,5 +6,5 @@
<!ELEMENT para (#PCDATA)>
]>
<item>
<para>&apos;they called me the hyacinth girl&apos;</para>
<para>'they called me the hyacinth girl'</para>
</item>

View File

@ -4,7 +4,7 @@
<!ENTITY test2 "test 2">
]>
<doc>
<Content>Reten&#231;&#227;o</Content>
<Content>Reten&#xE7;&#xE3;o</Content>
<Content>&lt;&gt;</Content>
<Content>test 1test 2</Content>
</doc>

View File

@ -2,4 +2,5 @@
<!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [
<!ENTITY iso6.doc.date "29-May-1999">
]>
<spec/>
<spec>
</spec>

View File

@ -2,14 +2,12 @@
<RDF:RDF xmlns:RDF="http://www.w3.org/TR/WD-rdf-syntax#" p3p="http//www.w3.org/TR/1998/WD-P3P10-syntax#proposal.DTD">
<PROP realm="http://www.CoolCatalog.com/catalogue/" entity="CoolCatalog" agreeID="94df1293a3e519bb" assurance="http://www.TrustUs.org">
<USES>
<STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you&apos;d appreciate.">
<WITH>
<PREFIX name="User.">
<STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you'd appreciate.">
<WITH><PREFIX name="User.">
<REF name="Name.First"/>
<REF name="Bdate.Year" optional="1"/>
<REF name="Gender"/>
</PREFIX>
</WITH>
</PREFIX></WITH>
</STATEMENT>
</USES>
<USES>
@ -18,5 +16,4 @@
</STATEMENT>
</USES>
<DISCLOSURE discURI="http://www.CoolCatalog.com/PrivacyPractice.html" access="3" other="0,1"/>
</PROP>
</RDF:RDF>
</PROP></RDF:RDF>

View File

@ -11,11 +11,11 @@
<RPM:Packager>Till Bubeck &lt;bubeck@delix.de&gt;, Ngo Than &lt;than@delix.de&gt;</RPM:Packager>
<RPM:Group>Libraries</RPM:Group>
<RPM:Summary>Bibliothek zur Ansteuerung von Terminals</RPM:Summary>
<RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#228;ngige
Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#252;gung, die
<RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#xE4;ngige
Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#xFC;gung, die
speziell optimiert sind.
Diese Version ist die &apos;new curses&apos; (ncurses) Variante und ist der
anerkannte Ersatz f&#252;r die klassische Curses-Library, die nicht mehr
Diese Version ist die 'new curses' (ncurses) Variante und ist der
anerkannte Ersatz f&#xFC;r die klassische Curses-Library, die nicht mehr
weiterentwickelt wird.</RPM:Description>
<RPM:Copyright>GPL</RPM:Copyright>
<RPM:Sources>ncurses4-4.2-3.src.rpm</RPM:Sources>

View File

@ -1,51 +1,63 @@
<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/">
<channel>
<title>Slashdot:News for Nerds. Stuff that Matters.</title>
<link>http://slashdot.org/</link>
<description>News for Nerds. Stuff that Matters</description>
</channel>
<image>
<title>Slashdot</title>
<url>http://slashdot.org/images/slashdotlg.gif</url>
<link>http://slashdot.org</link>
</image>
<item>
<title>100 Mbit/s on Fibre to the home</title>
<link>http://slashdot.org/articles/99/06/06/1440211.shtml</link>
</item>
<item>
<title>Gimp 1.2 Preview</title>
<link>http://slashdot.org/articles/99/06/06/1438246.shtml</link>
</item>
<item>
<title>Sony&apos;s AIBO robot Sold Out</title>
<title>Sony's AIBO robot Sold Out</title>
<link>http://slashdot.org/articles/99/06/06/1432256.shtml</link>
</item>
<item>
<title>Ask Slashdot: Another Word for &quot;Hacker&quot;?</title>
<link>http://slashdot.org/askslashdot/99/06/05/1815225.shtml</link>
</item>
<item>
<title>Corel Linux FAQ</title>
<link>http://slashdot.org/articles/99/06/05/1842218.shtml</link>
</item>
<item>
<title>Upside downsides MP3.COM.</title>
<link>http://slashdot.org/articles/99/06/05/1558210.shtml</link>
</item>
<item>
<title>2 Terabits of Bandwidth</title>
<link>http://slashdot.org/articles/99/06/05/1554258.shtml</link>
</item>
<item>
<title>Suppression of cold fusion research?</title>
<link>http://slashdot.org/articles/99/06/04/2313200.shtml</link>
</item>
<item>
<title>California Gov. Halts Wage Info Sale</title>
<link>http://slashdot.org/articles/99/06/04/235256.shtml</link>
</item>
<item>
<title>Red Hat Announces IPO</title>
<link>http://slashdot.org/articles/99/06/04/0849207.shtml</link>

View File

@ -5,7 +5,7 @@
<url>http://slashdot.org/articles/99/06/06/1440211.shtml</url>
<time>1999-06-06 14:39:59</time>
<author>CmdrTaco</author>
<department>wouldn&apos;t-it-be-nice</department>
<department>wouldn't-it-be-nice</department>
<topic>internet</topic>
<comments>20</comments>
<section>articles</section>
@ -23,7 +23,7 @@
<image>topicgimp.gif</image>
</story>
<story>
<title>Sony&apos;s AIBO robot Sold Out</title>
<title>Sony's AIBO robot Sold Out</title>
<url>http://slashdot.org/articles/99/06/06/1432256.shtml</url>
<time>1999-06-06 14:32:51</time>
<author>CmdrTaco</author>

View File

@ -1,7 +1,8 @@
<?xml version="1.0" standalone="yes"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
<svg width="242px" height="383px">
<g style="stroke: #000000"/>
<g style="stroke: #000000">
</g>
<g style="fill: #f2cc99">
<polyline verts=" 69,18 82,8 99,3 118,5 135,12 149,21 156,13 165,9 177,13 183,28 180,50 164,91 155,107 154,114 151,121 141,127 139,136 155,206 157,251 126,342 133,357 128,376 83,376 75,368 67,350 61,350 53,369 4,369 2,361 5,354 12,342 16,321 4,257 4,244 7,218 9,179 26,127 43,93 32,77 30,70 24,67 16,49 17,35 18,23 30,12 40,7 53,7 62,12 69,18 69,18 69,18"/>
</g>
@ -157,5 +158,4 @@
<polyline verts=" 147,338 142,341 143,345 141,354 147,343 147,338 147,338 147,338"/>
<polyline verts=" 157,342 156,349 150,356 157,353 163,346 162,342 157,342 157,342 157,342"/>
<polyline verts=" 99,265 96,284 92,299 73,339 73,333 87,300 99,265 99,265 99,265"/>
</g>
</svg>
</g></svg>

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -8,7 +8,8 @@
<g style="stroke: #800040">
<polyline verts=" 32,100 72,50 90,82 73,16 120,64 152,9 177,107"/>
</g>
<g style="stroke: #000000"/>
<g style="stroke: #000000">
</g>
<g style="stroke: #0000ff">
<rect x="30" y="101" width="51" height="33"/>
</g>
@ -38,11 +39,13 @@
<g style="stroke: #008080">
<text x="176" y="85">sadfsadfsad</text>
</g>
<g style="stroke: #000000"/>
<g style="stroke: #000000">
</g>
<g style="fill: #800040">
<ellipse cx="208" cy="180" major="45" minor="31" angle="0"/>
</g>
<g style="stroke: #000000"/>
<g style="stroke: #000000">
</g>
<g style="fill: #ffffff">
<g>
<desc> Java Font definition:Dialog 700</desc>
@ -50,5 +53,4 @@
<g>
<desc> Java Font definition:Dialog 700</desc>
</g>
</g>
</svg>
</g></svg>

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -1,8 +1,8 @@
<?xml version="1.0"?>
<!DOCTYPE test [
<!ELEMENT test (#PCDATA)>
<!ENTITY % xx "&#37;zz;">
<!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >'>
<!ENTITY tricky "error-prone">
<!ELEMENT test (#PCDATA)>
]>
<test>This sample shows a error-prone method.</test>

View File

@ -2,14 +2,12 @@
<RDF:RDF xmlns:RDF="http://www.w3.org/TR/WD-rdf-syntax#" p3p="http//www.w3.org/TR/1998/WD-P3P10-syntax#proposal.DTD">
<PROP realm="http://www.CoolCatalog.com/catalogue/" entity="CoolCatalog" agreeID="94df1293a3e519bb" assurance="http://www.TrustUs.org">
<USES>
<STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you&apos;d appreciate.">
<WITH>
<PREFIX name="User.">
<STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you'd appreciate.">
<WITH><PREFIX name="User.">
<REF name="Name.First"/>
<REF name="Bdate.Year" optional="1"/>
<REF name="Gender"/>
</PREFIX>
</WITH>
</PREFIX></WITH>
</STATEMENT>
</USES>
<USES>
@ -18,5 +16,4 @@
</STATEMENT>
</USES>
<DISCLOSURE discURI="http://www.CoolCatalog.com/PrivacyPractice.html" access="3" other="0,1"/>
</PROP>
</RDF:RDF>
</PROP></RDF:RDF>

View File

@ -11,11 +11,11 @@
<RPM:Packager>Till Bubeck &lt;bubeck@delix.de&gt;, Ngo Than &lt;than@delix.de&gt;</RPM:Packager>
<RPM:Group>Libraries</RPM:Group>
<RPM:Summary>Bibliothek zur Ansteuerung von Terminals</RPM:Summary>
<RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#228;ngige
Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#252;gung, die
<RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#xE4;ngige
Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#xFC;gung, die
speziell optimiert sind.
Diese Version ist die &apos;new curses&apos; (ncurses) Variante und ist der
anerkannte Ersatz f&#252;r die klassische Curses-Library, die nicht mehr
Diese Version ist die 'new curses' (ncurses) Variante und ist der
anerkannte Ersatz f&#xFC;r die klassische Curses-Library, die nicht mehr
weiterentwickelt wird.</RPM:Description>
<RPM:Copyright>GPL</RPM:Copyright>
<RPM:Sources>ncurses4-4.2-3.src.rpm</RPM:Sources>

View File

@ -1,51 +1,63 @@
<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/">
<channel>
<title>Slashdot:News for Nerds. Stuff that Matters.</title>
<link>http://slashdot.org/</link>
<description>News for Nerds. Stuff that Matters</description>
</channel>
<image>
<title>Slashdot</title>
<url>http://slashdot.org/images/slashdotlg.gif</url>
<link>http://slashdot.org</link>
</image>
<item>
<title>100 Mbit/s on Fibre to the home</title>
<link>http://slashdot.org/articles/99/06/06/1440211.shtml</link>
</item>
<item>
<title>Gimp 1.2 Preview</title>
<link>http://slashdot.org/articles/99/06/06/1438246.shtml</link>
</item>
<item>
<title>Sony&apos;s AIBO robot Sold Out</title>
<title>Sony's AIBO robot Sold Out</title>
<link>http://slashdot.org/articles/99/06/06/1432256.shtml</link>
</item>
<item>
<title>Ask Slashdot: Another Word for &quot;Hacker&quot;?</title>
<link>http://slashdot.org/askslashdot/99/06/05/1815225.shtml</link>
</item>
<item>
<title>Corel Linux FAQ</title>
<link>http://slashdot.org/articles/99/06/05/1842218.shtml</link>
</item>
<item>
<title>Upside downsides MP3.COM.</title>
<link>http://slashdot.org/articles/99/06/05/1558210.shtml</link>
</item>
<item>
<title>2 Terabits of Bandwidth</title>
<link>http://slashdot.org/articles/99/06/05/1554258.shtml</link>
</item>
<item>
<title>Suppression of cold fusion research?</title>
<link>http://slashdot.org/articles/99/06/04/2313200.shtml</link>
</item>
<item>
<title>California Gov. Halts Wage Info Sale</title>
<link>http://slashdot.org/articles/99/06/04/235256.shtml</link>
</item>
<item>
<title>Red Hat Announces IPO</title>
<link>http://slashdot.org/articles/99/06/04/0849207.shtml</link>

View File

@ -5,7 +5,7 @@
<url>http://slashdot.org/articles/99/06/06/1440211.shtml</url>
<time>1999-06-06 14:39:59</time>
<author>CmdrTaco</author>
<department>wouldn&apos;t-it-be-nice</department>
<department>wouldn't-it-be-nice</department>
<topic>internet</topic>
<comments>20</comments>
<section>articles</section>
@ -23,7 +23,7 @@
<image>topicgimp.gif</image>
</story>
<story>
<title>Sony&apos;s AIBO robot Sold Out</title>
<title>Sony's AIBO robot Sold Out</title>
<url>http://slashdot.org/articles/99/06/06/1432256.shtml</url>
<time>1999-06-06 14:32:51</time>
<author>CmdrTaco</author>

View File

@ -1,7 +1,8 @@
<?xml version="1.0" standalone="yes"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
<svg width="242px" height="383px">
<g style="stroke: #000000"/>
<g style="stroke: #000000">
</g>
<g style="fill: #f2cc99">
<polyline verts=" 69,18 82,8 99,3 118,5 135,12 149,21 156,13 165,9 177,13 183,28 180,50 164,91 155,107 154,114 151,121 141,127 139,136 155,206 157,251 126,342 133,357 128,376 83,376 75,368 67,350 61,350 53,369 4,369 2,361 5,354 12,342 16,321 4,257 4,244 7,218 9,179 26,127 43,93 32,77 30,70 24,67 16,49 17,35 18,23 30,12 40,7 53,7 62,12 69,18 69,18 69,18"/>
</g>
@ -157,5 +158,4 @@
<polyline verts=" 147,338 142,341 143,345 141,354 147,343 147,338 147,338 147,338"/>
<polyline verts=" 157,342 156,349 150,356 157,353 163,346 162,342 157,342 157,342 157,342"/>
<polyline verts=" 99,265 96,284 92,299 73,339 73,333 87,300 99,265 99,265 99,265"/>
</g>
</svg>
</g></svg>

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -8,7 +8,8 @@
<g style="stroke: #800040">
<polyline verts=" 32,100 72,50 90,82 73,16 120,64 152,9 177,107"/>
</g>
<g style="stroke: #000000"/>
<g style="stroke: #000000">
</g>
<g style="stroke: #0000ff">
<rect x="30" y="101" width="51" height="33"/>
</g>
@ -38,11 +39,13 @@
<g style="stroke: #008080">
<text x="176" y="85">sadfsadfsad</text>
</g>
<g style="stroke: #000000"/>
<g style="stroke: #000000">
</g>
<g style="fill: #800040">
<ellipse cx="208" cy="180" major="45" minor="31" angle="0"/>
</g>
<g style="stroke: #000000"/>
<g style="stroke: #000000">
</g>
<g style="fill: #ffffff">
<g>
<desc> Java Font definition:Dialog 700</desc>
@ -50,5 +53,4 @@
<g>
<desc> Java Font definition:Dialog 700</desc>
</g>
</g>
</svg>
</g></svg>

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -1,6 +1,8 @@
<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
<!DOCTYPE spec SYSTEM "dtds/spec.dtd" [
<!ENTITY XML.version "1.0">
<!-- LAST TOUCHED BY: Tim Bray, 8 February 1997 --><!-- The words 'FINAL EDIT' in comments mark places where changes
need to be made after approval of the document by the ERB, before
publication. --><!ENTITY XML.version "1.0">
<!ENTITY doc.date "10 February 1998">
<!ENTITY iso6.doc.date "19980210">
<!ENTITY w3c.doc.date "02-Feb-1998">
@ -15,27 +17,20 @@
<!ENTITY br "\n">
<!ENTITY cellback "#c0d9c0">
<!ENTITY mdash "--">
<!ENTITY com "--">
<!-- &#x2014, but nsgmls doesn't grok hex --><!ENTITY com "--">
<!ENTITY como "--">
<!ENTITY comc "--">
<!ENTITY hcro "&amp;#x">
<!ENTITY nbsp "&#160;">
<!-- <!ENTITY nbsp "<22>"> --><!ENTITY nbsp "&#160;">
<!ENTITY magicents "<code>amp</code>,
<code>lt</code>,
<code>gt</code>,
<code>apos</code>,
<code>quot</code>">
<!ENTITY doc.audience "public review and discussion">
<!-- audience and distribution status: for use at publication time --><!ENTITY doc.audience "public review and discussion">
<!ENTITY doc.distribution "may be distributed freely, as long as
all text and legal notices remain intact">
]>
<!-- LAST TOUCHED BY: Tim Bray, 8 February 1997 -->
<!-- The words 'FINAL EDIT' in comments mark places where changes
need to be made after approval of the document by the ERB, before
publication. -->
<!-- &#x2014, but nsgmls doesn't grok hex -->
<!-- <!ENTITY nbsp "<22>"> -->
<!-- audience and distribution status: for use at publication time -->
<!-- for Panorama *-->
<?VERBATIM "eg" ?>
<spec>
@ -110,7 +105,7 @@ HTML.</p>
other interested parties and has been endorsed by the
Director as a W3C Recommendation. It is a stable
document and may be used as reference material or cited
as a normative reference from another document. W3C&apos;s
as a normative reference from another document. W3C's
role in making the Recommendation is to draw attention
to the specification and to promote its widespread
deployment. This enhances the functionality and
@ -155,24 +150,24 @@ entify hard-coded document date in pubdate element,
change expansion of entity WebSGML,
update status description as per Dan Connolly (am not sure
about refernece to Berners-Lee et al.),
add &apos;The&apos; to abstract as per WG decision,
add 'The' to abstract as per WG decision,
move Relationship to Existing Standards to back matter and
combine with References,
re-order back matter so normative appendices come first,
re-tag back matter so informative appendices are tagged informdiv1,
remove XXX XXX from list of &apos;normative&apos; specs in prose,
remove XXX XXX from list of 'normative' specs in prose,
move some references from Other References to Normative References,
add RFC 1738, 1808, and 2141 to Other References (they are not
normative since we do not require the processor to enforce any
rules based on them),
add reference to &apos;Fielding draft&apos; (Berners-Lee et al.),
add reference to 'Fielding draft' (Berners-Lee et al.),
move notation section to end of body,
drop URIchar non-terminal and use SkipLit instead,
lose stray reference to defunct nonterminal &apos;markupdecls&apos;,
move reference to Aho et al. into appendix (Tim&apos;s right),
lose stray reference to defunct nonterminal 'markupdecls',
move reference to Aho et al. into appendix (Tim's right),
add prose note saying that hash marks and fragment identifiers are
NOT part of the URI formally speaking, and are NOT legal in
system identifiers (processor &apos;may&apos; signal an error).
system identifiers (processor 'may' signal an error).
Work through:
Tim Bray reacting to James Clark,
Tim Bray on his own,
@ -180,7 +175,7 @@ Eve Maler,
NOT DONE YET:
change binary / text to unparsed / parsed.
handle James&apos;s suggestion about &lt; in attriubte values
handle James's suggestion about &lt; in attriubte values
uppercase hex characters,
namechar list,
</sitem>
@ -193,7 +188,7 @@ drop SDD from EncodingDecl,
change text at version number 1.0,
drop misleading (wrong!) sentence about ignorables and extenders,
modify definition of PCData to make bar on msc grammatical,
change grammar&apos;s handling of internal subset (drop non-terminal markupdecls),
change grammar's handling of internal subset (drop non-terminal markupdecls),
change definition of includeSect to allow conditional sections,
add integral-declaration constraint on internal subset,
drop misleading / dangerous sentence about relationship of
@ -207,14 +202,14 @@ Unicode character database (needs further work!).
for PE appearance.</sitem>
<sitem>1997-10-01 : TB : Case-sensitive markup; cleaned up
element-type defs, lotsa little edits for style</sitem>
<sitem>1997-09-25 : TB : Change to elm&apos;s new DTD, with
<sitem>1997-09-25 : TB : Change to elm's new DTD, with
substantial detail cleanup as a side-effect</sitem>
<sitem>1997-07-24 : CMSMcQ : correct error (lost *) in definition
of ignoreSectContents (thanks to Makoto Murata)</sitem>
<sitem>Allow all empty elements to have end-tags, consistent with
SGML TC (as per JJC).</sitem>
<sitem>1997-07-23 : CMSMcQ : pre-emptive strike on pending corrections:
introduce the term &apos;empty-element tag&apos;, note that all empty elements
introduce the term 'empty-element tag', note that all empty elements
may use it, and elements declared EMPTY must use it.
Add WFC requiring encoding decl to come first in an entity.
Redefine notations to point to PIs as well as binary entities.
@ -225,15 +220,15 @@ mixed and element content.
</sitem>
<sitem>1997-06-30 : CMSMcQ : change date, some cosmetic changes,
changes to productions for choice, seq, Mixed, NotationType,
Enumeration. Follow James Clark&apos;s suggestion and prohibit
Enumeration. Follow James Clark's suggestion and prohibit
conditional sections in internal subset. TO DO: simplify
production for ignored sections as a result, since we don&apos;t
need to worry about parsers which don&apos;t expand PErefs finding
production for ignored sections as a result, since we don't
need to worry about parsers which don't expand PErefs finding
a conditional section.</sitem>
<sitem>1997-06-29 : TB : various edits</sitem>
<sitem>1997-06-29 : CMSMcQ : further changes:
Suppress old FINAL EDIT comments and some dead material.
Revise occurrences of % in grammar to exploit Henry Thompson&apos;s pun,
Revise occurrences of % in grammar to exploit Henry Thompson's pun,
especially markupdecl and attdef.
Remove RMD requirement relating to element content (?).
</sitem>
@ -249,8 +244,8 @@ Change def of %operator.
Change standard definitions of lt, gt, amp.
Strip leading zeros from #x00nn forms.</sitem>
<sitem>1997-04-02 : CMSMcQ : final corrections of editorial errors
found in last night&apos;s proofreading. Reverse course once more on
well-formed: Webster&apos;s Second hyphenates it, and that&apos;s enough
found in last night's proofreading. Reverse course once more on
well-formed: Webster's Second hyphenates it, and that's enough
for me.</sitem>
<sitem>1997-04-01 : CMSMcQ : corrections from JJC, EM, HT, and self</sitem>
<sitem>1997-03-31 : Tim Bray : many changes</sitem>
@ -265,11 +260,11 @@ Paul Grosso, and self. Among other things: give in on &quot;well formed&quot;
(Terry is right), tentatively rename QuotedCData as AttValue
and Literal as EntityValue to be more informative, since attribute
values are the <emph>only</emph> place QuotedCData was used, and
vice versa for entity text and Literal. (I&apos;d call it Entity Text,
vice versa for entity text and Literal. (I'd call it Entity Text,
but 8879 uses that name for both internal and external entities.)</sitem>
<sitem>1997-03-26 : CMSMcQ : resynch the two forks of this draft, reapply
my changes dated 03-20 and 03-21. Normalize old &apos;may not&apos; to &apos;must not&apos;
except in the one case where it meant &apos;may or may not&apos;.</sitem>
my changes dated 03-20 and 03-21. Normalize old 'may not' to 'must not'
except in the one case where it meant 'may or may not'.</sitem>
<sitem>1997-03-21 : TB : massive changes on plane flight from Chicago
to Vancouver</sitem>
<sitem>1997-03-21 : CMSMcQ : correct as many reported errors as possible.
@ -280,12 +275,12 @@ WWW conference April 1997: restore some of the internal entity
references (e.g. to docdate, etc.), change character xA0 to &amp;nbsp;
and define nbsp as &amp;#160;, and refill a lot of paragraphs for
legibility.</sitem>
<sitem>1996-11-12 : CMSMcQ : revise using Tim&apos;s edits:
<sitem>1996-11-12 : CMSMcQ : revise using Tim's edits:
Add list type of NUMBERED and change most lists either to
BULLETS or to NUMBERED.
Suppress QuotedNames, Names (not used).
Correct trivial-grammar doc type decl.
Rename &apos;marked section&apos; as &apos;CDATA section&apos; passim.
Rename 'marked section' as 'CDATA section' passim.
Also edits from James Clark:
Define the set of characters from which [^abc] subtracts.
Charref should use just [0-9] not Digit.
@ -293,9 +288,9 @@ Location info needs cleaner treatment: remove? (ERB
question).
One example of a PI has wrong pic.
Clarify discussion of encoding names.
Encoding failure should lead to unspecified results; don&apos;t
Encoding failure should lead to unspecified results; don't
prescribe error recovery.
Don&apos;t require exposure of entity boundaries.
Don't require exposure of entity boundaries.
Ignore white space in element content.
Reserve entity names of the form u-NNNN.
Clarify relative URLs.
@ -313,17 +308,17 @@ Finish stylistic revision.</sitem>
<sitem>1996-10-31 : TB : Add Entity Handling section</sitem>
<sitem>1996-10-30 : TB : Clean up term &amp; termdef. Slip in
ERB decision re EMPTY.</sitem>
<sitem>1996-10-28 : TB : Change DTD. Implement some of Michael&apos;s
<sitem>1996-10-28 : TB : Change DTD. Implement some of Michael's
suggestions. Change comments back to //. Introduce language for
XML namespace reservation. Add section on white-space handling.
Lots more cleanup.</sitem>
<sitem>1996-10-24 : CMSMcQ : quick tweaks, implement some ERB
decisions. Characters are not integers. Comments are /* */ not //.
Add bibliographic refs to 10646, HyTime, Unicode.
Rename old Cdata as MsData since it&apos;s <emph>only</emph> seen
Rename old Cdata as MsData since it's <emph>only</emph> seen
in marked sections. Call them attribute-value pairs not
name-value pairs, except once. Internal subset is optional, needs
&apos;?&apos;. Implied attributes should be signaled to the app, not
'?'. Implied attributes should be signaled to the app, not
have values supplied by processor.</sitem>
<sitem>1996-10-16 : TB : track down &amp; excise all DSD references;
introduce some EBNF for entity declarations.</sitem>
@ -340,10 +335,10 @@ Move old 2.2 XML Processors and Apps into intro.
Mention comments, PIs, and marked sections in discussion of
delimiter escaping.
Streamline discussion of doctype decl syntax.
Drop old section of &apos;PI syntax&apos; for doctype decl, and add
Drop old section of 'PI syntax' for doctype decl, and add
section on partial-DTD summary PIs to end of Logical Structures
section.
Revise DSD syntax section to use Tim&apos;s subset-in-a-PI
Revise DSD syntax section to use Tim's subset-in-a-PI
mechanism.</sitem>
<sitem>1996-10-10 : TB : eliminate name recognizers (and more?)</sitem>
<sitem>1996-10-09 : CMSMcQ : revise for style, consistency through 2.3
@ -381,7 +376,7 @@ Parsed data is made up of <termref def="dt-character">characters</termref>,
some
of which form <termref def="dt-chardata">character data</termref>,
and some of which form <termref def="dt-markup">markup</termref>.
Markup encodes a description of the document&apos;s storage layout and
Markup encodes a description of the document's storage layout and
logical structure. XML provides a mechanism to impose constraints on
the storage layout and logical structure.</p>
<p><termdef id="dt-xml-proc" term="XML Processor">A software module
@ -400,7 +395,7 @@ It was chaired by Jon Bosak of Sun
Microsystems with the active participation of an XML Special
Interest Group (previously known as the SGML Working Group) also
organized by the W3C. The membership of the XML Working Group is given
in an appendix. Dan Connolly served as the WG&apos;s contact with the W3C.
in an appendix. Dan Connolly served as the WG's contact with the W3C.
</p>
<p>The design goals for XML are:<olist><item><p>XML shall be straightforwardly usable over the
Internet.</p></item><item><p>XML shall support a wide variety of applications.</p></item><item><p>XML shall be compatible with SGML.</p></item><item><p>It shall be easy to write programs which process XML
@ -447,7 +442,7 @@ the processor may make unprocessed data from the document (with
intermingled character data and markup) available to the application.
Once a fatal error is detected, however, the processor must not
continue normal processing (i.e., it must not
continue to pass character data and information about the document&apos;s
continue to pass character data and information about the document's
logical structure to the application in the normal way).
</termdef></p></def></gitem><gitem><label>at user option</label><def><p>Conforming software may or must (depending on the modal verb in the
sentence) behave as described; if it does, it must
@ -608,7 +603,7 @@ beginning with a letter or one of a few punctuation characters, and continuing
with letters, digits, hyphens, underscores, colons, or full stops, together
known as name characters.</termdef>
Names beginning with the string &quot;<code>xml</code>&quot;, or any string
which would match <code>((&apos;X&apos;|&apos;x&apos;) (&apos;M&apos;|&apos;m&apos;) (&apos;L&apos;|&apos;l&apos;))</code>, are
which would match <code>(('X'|'x') ('M'|'m') ('L'|'l'))</code>, are
reserved for standardization in this or future versions of this
specification.
</p>
@ -629,9 +624,9 @@ should accept the colon as a name character.</p>
name characters.
<scrap lang="ebnf"><head>Names and Tokens</head><prod id="NT-NameChar"><lhs>NameChar</lhs><rhs><nt def="NT-Letter">Letter</nt>
| <nt def="NT-Digit">Digit</nt>
| &apos;.&apos; | &apos;-&apos; | &apos;_&apos; | &apos;:&apos;
| '.' | '-' | '_' | ':'
| <nt def="NT-CombiningChar">CombiningChar</nt>
| <nt def="NT-Extender">Extender</nt></rhs></prod><prod id="NT-Name"><lhs>Name</lhs><rhs>(<nt def="NT-Letter">Letter</nt> | &apos;_&apos; | &apos;:&apos;)
| <nt def="NT-Extender">Extender</nt></rhs></prod><prod id="NT-Name"><lhs>Name</lhs><rhs>(<nt def="NT-Letter">Letter</nt> | '_' | ':')
(<nt def="NT-NameChar">NameChar</nt>)*</rhs></prod><prod id="NT-Names"><lhs>Names</lhs><rhs><nt def="NT-Name">Name</nt>
(<nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>)*</rhs></prod><prod id="NT-Nmtoken"><lhs>Nmtoken</lhs><rhs>(<nt def="NT-NameChar">NameChar</nt>)+</rhs></prod><prod id="NT-Nmtokens"><lhs>Nmtokens</lhs><rhs><nt def="NT-Nmtoken">Nmtoken</nt> (<nt def="NT-S">S</nt> <nt def="NT-Nmtoken">Nmtoken</nt>)*</rhs></prod></scrap>
</p>
@ -645,30 +640,30 @@ and external identifiers
(<nt def="NT-SystemLiteral">SystemLiteral</nt>).
Note that a <nt def="NT-SystemLiteral">SystemLiteral</nt>
can be parsed without scanning for markup.
<scrap lang="ebnf"><head>Literals</head><prod id="NT-EntityValue"><lhs>EntityValue</lhs><rhs>&apos;&quot;&apos;
<scrap lang="ebnf"><head>Literals</head><prod id="NT-EntityValue"><lhs>EntityValue</lhs><rhs>'&quot;'
([^%&amp;&quot;]
| <nt def="NT-PEReference">PEReference</nt>
| <nt def="NT-Reference">Reference</nt>)*
&apos;&quot;&apos;
'&quot;'
</rhs><rhs>|&nbsp;
&quot;&apos;&quot;
([^%&amp;&apos;]
&quot;'&quot;
([^%&amp;']
| <nt def="NT-PEReference">PEReference</nt>
| <nt def="NT-Reference">Reference</nt>)*
&quot;&apos;&quot;</rhs></prod><prod id="NT-AttValue"><lhs>AttValue</lhs><rhs>&apos;&quot;&apos;
&quot;'&quot;</rhs></prod><prod id="NT-AttValue"><lhs>AttValue</lhs><rhs>'&quot;'
([^&lt;&amp;&quot;]
| <nt def="NT-Reference">Reference</nt>)*
&apos;&quot;&apos;
'&quot;'
</rhs><rhs>|&nbsp;
&quot;&apos;&quot;
([^&lt;&amp;&apos;]
&quot;'&quot;
([^&lt;&amp;']
| <nt def="NT-Reference">Reference</nt>)*
&quot;&apos;&quot;</rhs></prod><prod id="NT-SystemLiteral"><lhs>SystemLiteral</lhs><rhs>(&apos;&quot;&apos; [^&quot;]* &apos;&quot;&apos;) |&nbsp;(&quot;&apos;&quot; [^&apos;]* &quot;&apos;&quot;)
</rhs></prod><prod id="NT-PubidLiteral"><lhs>PubidLiteral</lhs><rhs>&apos;&quot;&apos; <nt def="NT-PubidChar">PubidChar</nt>*
&apos;&quot;&apos;
| &quot;&apos;&quot; (<nt def="NT-PubidChar">PubidChar</nt> - &quot;&apos;&quot;)* &quot;&apos;&quot;</rhs></prod><prod id="NT-PubidChar"><lhs>PubidChar</lhs><rhs>#x20 | #xD | #xA
&quot;'&quot;</rhs></prod><prod id="NT-SystemLiteral"><lhs>SystemLiteral</lhs><rhs>('&quot;' [^&quot;]* '&quot;') |&nbsp;(&quot;'&quot; [^']* &quot;'&quot;)
</rhs></prod><prod id="NT-PubidLiteral"><lhs>PubidLiteral</lhs><rhs>'&quot;' <nt def="NT-PubidChar">PubidChar</nt>*
'&quot;'
| &quot;'&quot; (<nt def="NT-PubidChar">PubidChar</nt> - &quot;'&quot;)* &quot;'&quot;</rhs></prod><prod id="NT-PubidChar"><lhs>PubidChar</lhs><rhs>#x20 | #xD | #xA
|&nbsp;[a-zA-Z0-9]
|&nbsp;[-&apos;()+,./:=?;!*#@$_%]</rhs></prod></scrap>
|&nbsp;[-'()+,./:=?;!*#@$_%]</rhs></prod></scrap>
</p>
</div2>
<div2 id="syntax">
@ -729,10 +724,10 @@ is any string of characters not including the CDATA-section-close
delimiter, &quot;<code>]]&gt;</code>&quot;.</p>
<p>
To allow attribute values to contain both single and double quotes, the
apostrophe or single-quote character (&apos;) may be represented as
apostrophe or single-quote character (') may be represented as
&quot;<code>&amp;apos;</code>&quot;, and the double-quote character (&quot;) as
&quot;<code>&amp;quot;</code>&quot;.
<scrap lang="ebnf"><head>Character Data</head><prod id="NT-CharData"><lhs>CharData</lhs><rhs>[^&lt;&amp;]* - ([^&lt;&amp;]* &apos;]]&gt;&apos; [^&lt;&amp;]*)</rhs></prod></scrap>
<scrap lang="ebnf"><head>Character Data</head><prod id="NT-CharData"><lhs>CharData</lhs><rhs>[^&lt;&amp;]* - ([^&lt;&amp;]* ']]&gt;' [^&lt;&amp;]*)</rhs></prod></scrap>
</p>
</div2>
<div2 id="sec-comments">
@ -743,17 +738,17 @@ appear anywhere in a document outside other
<termref def="dt-markup">markup</termref>; in addition,
they may appear within the document type declaration
at places allowed by the grammar.
They are not part of the document&apos;s <termref def="dt-chardata">character
They are not part of the document's <termref def="dt-chardata">character
data</termref>; an XML
processor may, but need not, make it possible for an application to
retrieve the text of comments.
<termref def="dt-compat">For compatibility</termref>, the string
&quot;<code>--</code>&quot; (double-hyphen) must not occur within
comments.
<scrap lang="ebnf"><head>Comments</head><prod id="NT-Comment"><lhs>Comment</lhs><rhs>&apos;&lt;!--&apos;
((<nt def="NT-Char">Char</nt> - &apos;-&apos;)
| (&apos;-&apos; (<nt def="NT-Char">Char</nt> - &apos;-&apos;)))*
&apos;--&gt;&apos;</rhs></prod></scrap>
<scrap lang="ebnf"><head>Comments</head><prod id="NT-Comment"><lhs>Comment</lhs><rhs>'&lt;!--'
((<nt def="NT-Char">Char</nt> - '-')
| ('-' (<nt def="NT-Char">Char</nt> - '-')))*
'--&gt;'</rhs></prod></scrap>
</termdef>
</p>
<p>An example of a comment:
@ -766,13 +761,13 @@ comments.
instructions</term> (PIs) allow documents to contain instructions
for applications.
<scrap lang="ebnf"><head>Processing Instructions</head><prod id="NT-PI"><lhs>PI</lhs><rhs>&apos;&lt;?&apos; <nt def="NT-PITarget">PITarget</nt>
<scrap lang="ebnf"><head>Processing Instructions</head><prod id="NT-PI"><lhs>PI</lhs><rhs>'&lt;?' <nt def="NT-PITarget">PITarget</nt>
(<nt def="NT-S">S</nt>
(<nt def="NT-Char">Char</nt>* -
(<nt def="NT-Char">Char</nt>* &pic; <nt def="NT-Char">Char</nt>*)))?
&pic;</rhs></prod><prod id="NT-PITarget"><lhs>PITarget</lhs><rhs><nt def="NT-Name">Name</nt> -
((&apos;X&apos; | &apos;x&apos;) (&apos;M&apos; | &apos;m&apos;) (&apos;L&apos; | &apos;l&apos;))</rhs></prod></scrap></termdef>
PIs are not part of the document&apos;s <termref def="dt-chardata">character
(('X' | 'x') ('M' | 'm') ('L' | 'l'))</rhs></prod></scrap></termdef>
PIs are not part of the document's <termref def="dt-chardata">character
data</termref>, but must be passed through to the application. The
PI begins with a target (<nt def="NT-PITarget">PITarget</nt>) used
to identify the application to which the instruction is directed.
@ -796,9 +791,9 @@ string &quot;<code>&lt;![CDATA[</code>&quot; and end with the string
&quot;<code>]]&gt;</code>&quot;:
<scrap lang="ebnf"><head>CDATA Sections</head><prod id="NT-CDSect"><lhs>CDSect</lhs><rhs><nt def="NT-CDStart">CDStart</nt>
<nt def="NT-CData">CData</nt>
<nt def="NT-CDEnd">CDEnd</nt></rhs></prod><prod id="NT-CDStart"><lhs>CDStart</lhs><rhs>&apos;&lt;![CDATA[&apos;</rhs></prod><prod id="NT-CData"><lhs>CData</lhs><rhs>(<nt def="NT-Char">Char</nt>* -
(<nt def="NT-Char">Char</nt>* &apos;]]&gt;&apos; <nt def="NT-Char">Char</nt>*))
</rhs></prod><prod id="NT-CDEnd"><lhs>CDEnd</lhs><rhs>&apos;]]&gt;&apos;</rhs></prod></scrap>
<nt def="NT-CDEnd">CDEnd</nt></rhs></prod><prod id="NT-CDStart"><lhs>CDStart</lhs><rhs>'&lt;![CDATA['</rhs></prod><prod id="NT-CData"><lhs>CData</lhs><rhs>(<nt def="NT-Char">Char</nt>* -
(<nt def="NT-Char">Char</nt>* ']]&gt;' <nt def="NT-Char">Char</nt>*))
</rhs></prod><prod id="NT-CDEnd"><lhs>CDEnd</lhs><rhs>']]&gt;'</rhs></prod></scrap>
Within a CDATA section, only the <nt def="NT-CDEnd">CDEnd</nt> string is
recognized as markup, so that left angle brackets and ampersands may occur in
@ -865,9 +860,9 @@ the first <termref def="dt-element">element</termref> in the document.
<nt def="NT-EncodingDecl">EncodingDecl</nt>?
<nt def="NT-SDDecl">SDDecl</nt>?
<nt def="NT-S">S</nt>?
&pic;</rhs></prod><prod id="NT-VersionInfo"><lhs>VersionInfo</lhs><rhs><nt def="NT-S">S</nt> &apos;version&apos; <nt def="NT-Eq">Eq</nt>
(&apos; <nt def="NT-VersionNum">VersionNum</nt> &apos;
| &quot; <nt def="NT-VersionNum">VersionNum</nt> &quot;)</rhs></prod><prod id="NT-Eq"><lhs>Eq</lhs><rhs><nt def="NT-S">S</nt>? &apos;=&apos; <nt def="NT-S">S</nt>?</rhs></prod><prod id="NT-VersionNum"><lhs>VersionNum</lhs><rhs>([a-zA-Z0-9_.:] | &apos;-&apos;)+</rhs></prod><prod id="NT-Misc"><lhs>Misc</lhs><rhs><nt def="NT-Comment">Comment</nt> | <nt def="NT-PI">PI</nt> |
&pic;</rhs></prod><prod id="NT-VersionInfo"><lhs>VersionInfo</lhs><rhs><nt def="NT-S">S</nt> 'version' <nt def="NT-Eq">Eq</nt>
(' <nt def="NT-VersionNum">VersionNum</nt> '
| &quot; <nt def="NT-VersionNum">VersionNum</nt> &quot;)</rhs></prod><prod id="NT-Eq"><lhs>Eq</lhs><rhs><nt def="NT-S">S</nt>? '=' <nt def="NT-S">S</nt>?</rhs></prod><prod id="NT-VersionNum"><lhs>VersionNum</lhs><rhs>([a-zA-Z0-9_.:] | '-')+</rhs></prod><prod id="NT-Misc"><lhs>Misc</lhs><rhs><nt def="NT-Comment">Comment</nt> | <nt def="NT-PI">PI</nt> |
<nt def="NT-S">S</nt></rhs></prod></prodgroup></scrap></p>
<p><termdef id="dt-doctype" term="Document Type Declaration">The XML
<term>document type declaration</term>
@ -903,15 +898,15 @@ For fuller information, see
<prodgroup pcw2="6" pcw4="17.5" pcw5="9">
<prod id="NT-doctypedecl">
<lhs>doctypedecl</lhs>
<rhs>&apos;&lt;!DOCTYPE&apos; <nt def="NT-S">S</nt>
<rhs>'&lt;!DOCTYPE' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt>
<nt def="NT-ExternalID">ExternalID</nt>)?
<nt def="NT-S">S</nt>? (&apos;[&apos;
<nt def="NT-S">S</nt>? ('['
(<nt def="NT-markupdecl">markupdecl</nt>
| <nt def="NT-PEReference">PEReference</nt>
| <nt def="NT-S">S</nt>)*
&apos;]&apos;
<nt def="NT-S">S</nt>?)? &apos;&gt;&apos;</rhs>
']'
<nt def="NT-S">S</nt>?)? '&gt;'</rhs>
<vc def="vc-roottype"/>
</prod>
<prod id="NT-markupdecl">
@ -1025,8 +1020,8 @@ whether or not there are such declarations which appear external to
the <termref def="dt-docent">document entity</termref>.
<scrap lang="ebnf" id="fulldtd"><head>Standalone Document Declaration</head><prodgroup pcw2="4" pcw4="19.5" pcw5="9"><prod id="NT-SDDecl"><lhs>SDDecl</lhs><rhs>
<nt def="NT-S">S</nt>
&apos;standalone&apos; <nt def="NT-Eq">Eq</nt>
((&quot;&apos;&quot; (&apos;yes&apos; | &apos;no&apos;) &quot;&apos;&quot;) | (&apos;&quot;&apos; (&apos;yes&apos; | &apos;no&apos;) &apos;&quot;&apos;))
'standalone' <nt def="NT-Eq">Eq</nt>
((&quot;'&quot; ('yes' | 'no') &quot;'&quot;) | ('&quot;' ('yes' | 'no') '&quot;'))
</rhs><vc def="vc-check-rmd"/></prod></prodgroup></scrap></p>
<p>
In a standalone document declaration, the value &quot;<code>yes</code>&quot; indicates
@ -1082,7 +1077,7 @@ directly within any instance of those types.
</item>
</ulist>
</vcnote>
<p>An example XML declaration with a standalone document declaration:<eg>&lt;?xml version=&quot;&XML.version;&quot; standalone=&apos;yes&apos;?&gt;</eg></p>
<p>An example XML declaration with a standalone document declaration:<eg>&lt;?xml version=&quot;&XML.version;&quot; standalone='yes'?&gt;</eg></p>
</div2>
<div2 id="sec-white-space">
<head>White Space Handling</head>
@ -1111,7 +1106,7 @@ When declared, it must be given as an
<termref def="dt-enumerated">enumerated type</termref> whose only
possible values are &quot;<code>default</code>&quot; and &quot;<code>preserve</code>&quot;.
For example:<eg><![CDATA[ <!ATTLIST poem xml:space (default|preserve) 'preserve'>]]></eg></p>
<p>The value &quot;<code>default</code>&quot; signals that applications&apos;
<p>The value &quot;<code>default</code>&quot; signals that applications'
default white-space processing modes are acceptable for this element; the
value &quot;<code>preserve</code>&quot; indicates the intent that applications preserve
all the white space.
@ -1158,9 +1153,9 @@ In valid documents, this attribute, like any other, must be
The values of the attribute are language identifiers as defined
by <bibref ref="RFC1766"/>, &quot;Tags for the Identification of Languages&quot;:
<scrap lang="ebnf"><head>Language Identification</head><prod id="NT-LanguageID"><lhs>LanguageID</lhs><rhs><nt def="NT-Langcode">Langcode</nt>
(&apos;-&apos; <nt def="NT-Subcode">Subcode</nt>)*</rhs></prod><prod id="NT-Langcode"><lhs>Langcode</lhs><rhs><nt def="NT-ISO639Code">ISO639Code</nt> |
('-' <nt def="NT-Subcode">Subcode</nt>)*</rhs></prod><prod id="NT-Langcode"><lhs>Langcode</lhs><rhs><nt def="NT-ISO639Code">ISO639Code</nt> |
<nt def="NT-IanaCode">IanaCode</nt> |
<nt def="NT-UserCode">UserCode</nt></rhs></prod><prod id="NT-ISO639Code"><lhs>ISO639Code</lhs><rhs>([a-z] | [A-Z]) ([a-z] | [A-Z])</rhs></prod><prod id="NT-IanaCode"><lhs>IanaCode</lhs><rhs>(&apos;i&apos; | &apos;I&apos;) &apos;-&apos; ([a-z] | [A-Z])+</rhs></prod><prod id="NT-UserCode"><lhs>UserCode</lhs><rhs>(&apos;x&apos; | &apos;X&apos;) &apos;-&apos; ([a-z] | [A-Z])+</rhs></prod><prod id="NT-Subcode"><lhs>Subcode</lhs><rhs>([a-z] | [A-Z])+</rhs></prod></scrap>
<nt def="NT-UserCode">UserCode</nt></rhs></prod><prod id="NT-ISO639Code"><lhs>ISO639Code</lhs><rhs>([a-z] | [A-Z]) ([a-z] | [A-Z])</rhs></prod><prod id="NT-IanaCode"><lhs>IanaCode</lhs><rhs>('i' | 'I') '-' ([a-z] | [A-Z])+</rhs></prod><prod id="NT-UserCode"><lhs>UserCode</lhs><rhs>('x' | 'X') '-' ([a-z] | [A-Z])+</rhs></prod><prod id="NT-Subcode"><lhs>Subcode</lhs><rhs>([a-z] | [A-Z])+</rhs></prod></scrap>
The <nt def="NT-Langcode">Langcode</nt> may be any of the following:
<ulist><item><p>a two-letter language code as defined by
<bibref ref="ISO639"/>, &quot;Codes
@ -1258,14 +1253,14 @@ has a <termref def="dt-attrname">name</termref> and a <termref def="dt-attrval">
</scrap>
<p>This specification does not constrain the semantics, use, or (beyond
syntax) names of the element types and attributes, except that names
beginning with a match to <code>((&apos;X&apos;|&apos;x&apos;)(&apos;M&apos;|&apos;m&apos;)(&apos;L&apos;|&apos;l&apos;))</code>
beginning with a match to <code>(('X'|'x')('M'|'m')('L'|'l'))</code>
are reserved for standardization in this or future versions of this
specification.
</p>
<wfcnote id="GIMatch">
<head>Element Type Match</head>
<p>
The <nt def="NT-Name">Name</nt> in an element&apos;s end-tag must match
The <nt def="NT-Name">Name</nt> in an element's end-tag must match
the element type in
the start-tag.
</p>
@ -1309,13 +1304,13 @@ been declared.</p>
<head>Start-Tags, End-Tags, and Empty-Element Tags</head>
<p><termdef id="dt-stag" term="Start-Tag">The beginning of every
non-empty XML element is marked by a <term>start-tag</term>.
<scrap lang="ebnf"><head>Start-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-STag"><lhs>STag</lhs><rhs>&apos;&lt;&apos; <nt def="NT-Name">Name</nt>
<scrap lang="ebnf"><head>Start-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-STag"><lhs>STag</lhs><rhs>'&lt;' <nt def="NT-Name">Name</nt>
(<nt def="NT-S">S</nt> <nt def="NT-Attribute">Attribute</nt>)*
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><wfc def="uniqattspec"/></prod><prod id="NT-Attribute"><lhs>Attribute</lhs><rhs><nt def="NT-Name">Name</nt> <nt def="NT-Eq">Eq</nt>
<nt def="NT-S">S</nt>? '&gt;'</rhs><wfc def="uniqattspec"/></prod><prod id="NT-Attribute"><lhs>Attribute</lhs><rhs><nt def="NT-Name">Name</nt> <nt def="NT-Eq">Eq</nt>
<nt def="NT-AttValue">AttValue</nt></rhs><vc def="ValueType"/><wfc def="NoExternalRefs"/><wfc def="CleanAttrVals"/></prod></prodgroup></scrap>
The <nt def="NT-Name">Name</nt> in
the start- and end-tags gives the
element&apos;s <term>type</term>.</termdef>
element's <term>type</term>.</termdef>
<termdef id="dt-attr" term="Attribute">
The <nt def="NT-Name">Name</nt>-<nt def="NT-AttValue">AttValue</nt> pairs are
referred to as
@ -1325,7 +1320,7 @@ the <term>attribute specifications</term> of the element</termdef>,
referred to as the <term>attribute name</term></termdef> and
<termdef id="dt-attrval" term="Attribute Value">the content of the
<nt def="NT-AttValue">AttValue</nt> (the text between the
<code>&apos;</code> or <code>&quot;</code> delimiters)
<code>'</code> or <code>&quot;</code> delimiters)
as the <term>attribute value</term>.</termdef>
</p>
<wfcnote id="uniqattspec">
@ -1364,17 +1359,17 @@ a <code>&lt;</code>.
<termdef id="dt-etag" term="End Tag">The end of every element
that begins with a start-tag must
be marked by an <term>end-tag</term>
containing a name that echoes the element&apos;s type as given in the
containing a name that echoes the element's type as given in the
start-tag:
<scrap lang="ebnf"><head>End-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-ETag"><lhs>ETag</lhs><rhs>&apos;&lt;/&apos; <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod></prodgroup></scrap>
<scrap lang="ebnf"><head>End-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-ETag"><lhs>ETag</lhs><rhs>'&lt;/' <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt>? '&gt;'</rhs></prod></prodgroup></scrap>
</termdef>
</p>
<p>An example of an end-tag:<eg>&lt;/termdef&gt;</eg></p>
<p>
<termdef id="dt-content" term="Content">The
<termref def="dt-text">text</termref> between the start-tag and
end-tag is called the element&apos;s
end-tag is called the element's
<term>content</term>:
<scrap lang="ebnf"><head>Content of Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-content"><lhs>content</lhs><rhs>(<nt def="NT-element">element</nt> | <nt def="NT-CharData">CharData</nt>
| <nt def="NT-Reference">Reference</nt> | <nt def="NT-CDSect">CDSect</nt>
@ -1386,9 +1381,9 @@ it must be represented either by a start-tag immediately followed
by an end-tag or by an empty-element tag.</termdef>
<termdef id="dt-eetag" term="empty-element tag">An
<term>empty-element tag</term> takes a special form:
<scrap lang="ebnf"><head>Tags for Empty Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-EmptyElemTag"><lhs>EmptyElemTag</lhs><rhs>&apos;&lt;&apos; <nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt>
<scrap lang="ebnf"><head>Tags for Empty Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-EmptyElemTag"><lhs>EmptyElemTag</lhs><rhs>'&lt;' <nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt>
<nt def="NT-Attribute">Attribute</nt>)* <nt def="NT-S">S</nt>?
&apos;/&gt;&apos;</rhs><wfc def="uniqattspec"/></prod></prodgroup></scrap>
'/&gt;'</rhs><wfc def="uniqattspec"/></prod></prodgroup></scrap>
</termdef></p>
<p>Empty-element tags may be used for any element which has no
content, whether or not it is declared using the keyword
@ -1409,7 +1404,7 @@ tag must be used, and can only be used, for elements which are
<termref def="dt-valid">validation</termref> purposes,
be constrained
using element type and attribute-list declarations.
An element type declaration constrains the element&apos;s
An element type declaration constrains the element's
<termref def="dt-content">content</termref>.
</p>
<p>Element type declarations often constrain which element types can
@ -1419,12 +1414,12 @@ when a declaration mentions an element type for which no declaration
is provided, but this is not an error.</p>
<p><termdef id="dt-eldecl" term="Element Type declaration">An <term>element
type declaration</term> takes the form:
<scrap lang="ebnf"><head>Element Type Declaration</head><prodgroup pcw2="5.5" pcw4="18" pcw5="9"><prod id="NT-elementdecl"><lhs>elementdecl</lhs><rhs>&apos;&lt;!ELEMENT&apos; <nt def="NT-S">S</nt>
<scrap lang="ebnf"><head>Element Type Declaration</head><prodgroup pcw2="5.5" pcw4="18" pcw5="9"><prod id="NT-elementdecl"><lhs>elementdecl</lhs><rhs>'&lt;!ELEMENT' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt>
<nt def="NT-contentspec">contentspec</nt>
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><vc def="EDUnique"/></prod><prod id="NT-contentspec"><lhs>contentspec</lhs><rhs>&apos;EMPTY&apos;
| &apos;ANY&apos;
<nt def="NT-S">S</nt>? '&gt;'</rhs><vc def="EDUnique"/></prod><prod id="NT-contentspec"><lhs>contentspec</lhs><rhs>'EMPTY'
| 'ANY'
| <nt def="NT-Mixed">Mixed</nt>
| <nt def="NT-children">children</nt>
</rhs></prod></prodgroup></scrap>
@ -1461,14 +1456,14 @@ choice lists of content particles, or
sequence lists of content particles:
<scrap lang="ebnf"><head>Element-content Models</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-children"><lhs>children</lhs><rhs>(<nt def="NT-choice">choice</nt>
| <nt def="NT-seq">seq</nt>)
(&apos;?&apos; | &apos;*&apos; | &apos;+&apos;)?</rhs></prod><prod id="NT-cp"><lhs>cp</lhs><rhs>(<nt def="NT-Name">Name</nt>
('?' | '*' | '+')?</rhs></prod><prod id="NT-cp"><lhs>cp</lhs><rhs>(<nt def="NT-Name">Name</nt>
| <nt def="NT-choice">choice</nt>
| <nt def="NT-seq">seq</nt>)
(&apos;?&apos; | &apos;*&apos; | &apos;+&apos;)?</rhs></prod><prod id="NT-choice"><lhs>choice</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? cp
( <nt def="NT-S">S</nt>? &apos;|&apos; <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
<nt def="NT-S">S</nt>? &apos;)&apos;</rhs><vc def="vc-PEinGroup"/></prod><prod id="NT-seq"><lhs>seq</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? cp
( <nt def="NT-S">S</nt>? &apos;,&apos; <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
<nt def="NT-S">S</nt>? &apos;)&apos;</rhs><vc def="vc-PEinGroup"/></prod></prodgroup></scrap>
('?' | '*' | '+')?</rhs></prod><prod id="NT-choice"><lhs>choice</lhs><rhs>'(' <nt def="NT-S">S</nt>? cp
( <nt def="NT-S">S</nt>? '|' <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
<nt def="NT-S">S</nt>? ')'</rhs><vc def="vc-PEinGroup"/></prod><prod id="NT-seq"><lhs>seq</lhs><rhs>'(' <nt def="NT-S">S</nt>? cp
( <nt def="NT-S">S</nt>? ',' <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
<nt def="NT-S">S</nt>? ')'</rhs><vc def="vc-PEinGroup"/></prod></prodgroup></scrap>
where each <nt def="NT-Name">Name</nt> is the type of an element which may
appear as a <termref def="dt-parentchild">child</termref>.
Any content
@ -1532,14 +1527,14 @@ character data, optionally interspersed with
<termref def="dt-parentchild">child</termref> elements.</termdef>
In this case, the types of the child elements
may be constrained, but not their order or their number of occurrences:
<scrap lang="ebnf"><head>Mixed-content Declaration</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-Mixed"><lhs>Mixed</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>?
&apos;#PCDATA&apos;
<scrap lang="ebnf"><head>Mixed-content Declaration</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-Mixed"><lhs>Mixed</lhs><rhs>'(' <nt def="NT-S">S</nt>?
'#PCDATA'
(<nt def="NT-S">S</nt>?
&apos;|&apos;
'|'
<nt def="NT-S">S</nt>?
<nt def="NT-Name">Name</nt>)*
<nt def="NT-S">S</nt>?
&apos;)*&apos; </rhs><rhs>| &apos;(&apos; <nt def="NT-S">S</nt>? &apos;#PCDATA&apos; <nt def="NT-S">S</nt>? &apos;)&apos;
')*' </rhs><rhs>| '(' <nt def="NT-S">S</nt>? '#PCDATA' <nt def="NT-S">S</nt>? ')'
</rhs><vc def="vc-PEinGroup"/><vc def="vc-MixedChildrenUnique"/></prod></prodgroup></scrap>
where the <nt def="NT-Name">Name</nt>s give the types of elements
that may appear as children.
@ -1575,10 +1570,10 @@ for attributes.</p></item></ulist>
<termdef id="dt-attdecl" term="Attribute-List Declaration">
<term>Attribute-list declarations</term> specify the name, data type, and default
value (if any) of each attribute associated with a given element type:
<scrap lang="ebnf"><head>Attribute-list Declaration</head><prod id="NT-AttlistDecl"><lhs>AttlistDecl</lhs><rhs>&apos;&lt;!ATTLIST&apos; <nt def="NT-S">S</nt>
<scrap lang="ebnf"><head>Attribute-list Declaration</head><prod id="NT-AttlistDecl"><lhs>AttlistDecl</lhs><rhs>'&lt;!ATTLIST' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt>
<nt def="NT-AttDef">AttDef</nt>*
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-AttDef"><lhs>AttDef</lhs><rhs><nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-AttDef"><lhs>AttDef</lhs><rhs><nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt> <nt def="NT-AttType">AttType</nt>
<nt def="NT-S">S</nt> <nt def="NT-DefaultDecl">DefaultDecl</nt></rhs></prod></scrap>
The <nt def="NT-Name">Name</nt> in the
@ -1615,7 +1610,7 @@ and semantic constraints, as noted:
<scrap lang="ebnf"><head>Attribute Types</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-AttType"><lhs>AttType</lhs><rhs><nt def="NT-StringType">StringType</nt>
| <nt def="NT-TokenizedType">TokenizedType</nt>
| <nt def="NT-EnumeratedType">EnumeratedType</nt>
</rhs></prod><prod id="NT-StringType"><lhs>StringType</lhs><rhs>&apos;CDATA&apos;</rhs></prod><prod id="NT-TokenizedType"><lhs>TokenizedType</lhs><rhs>&apos;ID&apos;</rhs><vc def="id"/><vc def="one-id-per-el"/><vc def="id-default"/><rhs>| &apos;IDREF&apos;</rhs><vc def="idref"/><rhs>| &apos;IDREFS&apos;</rhs><vc def="idref"/><rhs>| &apos;ENTITY&apos;</rhs><vc def="entname"/><rhs>| &apos;ENTITIES&apos;</rhs><vc def="entname"/><rhs>| &apos;NMTOKEN&apos;</rhs><vc def="nmtok"/><rhs>| &apos;NMTOKENS&apos;</rhs><vc def="nmtok"/></prod></prodgroup></scrap>
</rhs></prod><prod id="NT-StringType"><lhs>StringType</lhs><rhs>'CDATA'</rhs></prod><prod id="NT-TokenizedType"><lhs>TokenizedType</lhs><rhs>'ID'</rhs><vc def="id"/><vc def="one-id-per-el"/><vc def="id-default"/><rhs>| 'IDREF'</rhs><vc def="idref"/><rhs>| 'IDREFS'</rhs><vc def="idref"/><rhs>| 'ENTITY'</rhs><vc def="entname"/><rhs>| 'ENTITIES'</rhs><vc def="entname"/><rhs>| 'NMTOKEN'</rhs><vc def="nmtok"/><rhs>| 'NMTOKENS'</rhs><vc def="nmtok"/></prod></prodgroup></scrap>
</p>
<vcnote id="id">
<head>ID</head>
@ -1679,21 +1674,21 @@ of a list of values provided in the declaration</termdef>. There are two
kinds of enumerated types:
<scrap lang="ebnf"><head>Enumerated Attribute Types</head><prod id="NT-EnumeratedType"><lhs>EnumeratedType</lhs><rhs><nt def="NT-NotationType">NotationType</nt>
| <nt def="NT-Enumeration">Enumeration</nt>
</rhs></prod><prod id="NT-NotationType"><lhs>NotationType</lhs><rhs>&apos;NOTATION&apos;
</rhs></prod><prod id="NT-NotationType"><lhs>NotationType</lhs><rhs>'NOTATION'
<nt def="NT-S">S</nt>
&apos;(&apos;
'('
<nt def="NT-S">S</nt>?
<nt def="NT-Name">Name</nt>
(<nt def="NT-S">S</nt>? &apos;|&apos; <nt def="NT-S">S</nt>?
(<nt def="NT-S">S</nt>? '|' <nt def="NT-S">S</nt>?
<nt def="NT-Name">Name</nt>)*
<nt def="NT-S">S</nt>? &apos;)&apos;
</rhs><vc def="notatn"/></prod><prod id="NT-Enumeration"><lhs>Enumeration</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>?
<nt def="NT-S">S</nt>? ')'
</rhs><vc def="notatn"/></prod><prod id="NT-Enumeration"><lhs>Enumeration</lhs><rhs>'(' <nt def="NT-S">S</nt>?
<nt def="NT-Nmtoken">Nmtoken</nt>
(<nt def="NT-S">S</nt>? &apos;|&apos;
(<nt def="NT-S">S</nt>? '|'
<nt def="NT-S">S</nt>?
<nt def="NT-Nmtoken">Nmtoken</nt>)*
<nt def="NT-S">S</nt>?
&apos;)&apos;</rhs><vc def="enum"/></prod></scrap>
')'</rhs><vc def="enum"/></prod></scrap>
A <kw>NOTATION</kw> attribute identifies a
<termref def="dt-notation">notation</termref>, declared in the
DTD with associated system and/or public identifiers, to
@ -1726,10 +1721,10 @@ enumerated attribute types of a single element type.
<head>Attribute Defaults</head>
<p>An <termref def="dt-attdecl">attribute declaration</termref> provides
information on whether
the attribute&apos;s presence is required, and if not, how an XML processor should
the attribute's presence is required, and if not, how an XML processor should
react if a declared attribute is absent in a document.
<scrap lang="ebnf"><head>Attribute Defaults</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-DefaultDecl"><lhs>DefaultDecl</lhs><rhs>&apos;#REQUIRED&apos;
|&nbsp;&apos;#IMPLIED&apos; </rhs><rhs>| ((&apos;#FIXED&apos; S)? <nt def="NT-AttValue">AttValue</nt>)</rhs><vc def="RequiredAttr"/><vc def="defattrvalid"/><wfc def="CleanAttrVals"/><vc def="FixedAttr"/></prod></prodgroup></scrap>
<scrap lang="ebnf"><head>Attribute Defaults</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-DefaultDecl"><lhs>DefaultDecl</lhs><rhs>'#REQUIRED'
|&nbsp;'#IMPLIED' </rhs><rhs>| (('#FIXED' S)? <nt def="NT-AttValue">AttValue</nt>)</rhs><vc def="RequiredAttr"/><vc def="defattrvalid"/><wfc def="CleanAttrVals"/><vc def="FixedAttr"/></prod></prodgroup></scrap>
</p>
<p>In an attribute declaration, <kw>#REQUIRED</kw> means that the
@ -1817,16 +1812,16 @@ included in, or excluded from, the logical structure of the DTD based on
the keyword which governs them.</termdef>
<scrap lang="ebnf"><head>Conditional Section</head><prodgroup pcw2="9" pcw4="14.5"><prod id="NT-conditionalSect"><lhs>conditionalSect</lhs><rhs><nt def="NT-includeSect">includeSect</nt>
| <nt def="NT-ignoreSect">ignoreSect</nt>
</rhs></prod><prod id="NT-includeSect"><lhs>includeSect</lhs><rhs>&apos;&lt;![&apos; S? &apos;INCLUDE&apos; S? &apos;[&apos;
</rhs></prod><prod id="NT-includeSect"><lhs>includeSect</lhs><rhs>'&lt;![' S? 'INCLUDE' S? '['
<nt def="NT-extSubsetDecl">extSubsetDecl</nt>
&apos;]]&gt;&apos;
</rhs></prod><prod id="NT-ignoreSect"><lhs>ignoreSect</lhs><rhs>&apos;&lt;![&apos; S? &apos;IGNORE&apos; S? &apos;[&apos;
']]&gt;'
</rhs></prod><prod id="NT-ignoreSect"><lhs>ignoreSect</lhs><rhs>'&lt;![' S? 'IGNORE' S? '['
<nt def="NT-ignoreSectContents">ignoreSectContents</nt>*
&apos;]]&gt;&apos;</rhs></prod><prod id="NT-ignoreSectContents"><lhs>ignoreSectContents</lhs><rhs><nt def="NT-Ignore">Ignore</nt>
(&apos;&lt;![&apos; <nt def="NT-ignoreSectContents">ignoreSectContents</nt> &apos;]]&gt;&apos;
']]&gt;'</rhs></prod><prod id="NT-ignoreSectContents"><lhs>ignoreSectContents</lhs><rhs><nt def="NT-Ignore">Ignore</nt>
('&lt;![' <nt def="NT-ignoreSectContents">ignoreSectContents</nt> ']]&gt;'
<nt def="NT-Ignore">Ignore</nt>)*</rhs></prod><prod id="NT-Ignore"><lhs>Ignore</lhs><rhs><nt def="NT-Char">Char</nt>* -
(<nt def="NT-Char">Char</nt>* (&apos;&lt;![&apos; | &apos;]]&gt;&apos;)
(<nt def="NT-Char">Char</nt>* ('&lt;![' | ']]&gt;')
<nt def="NT-Char">Char</nt>*)
</rhs></prod></prodgroup></scrap>
</p>
@ -1854,8 +1849,8 @@ parameter-entity reference, the parameter entity must be replaced by its
content before the processor decides whether to
include or ignore the conditional section.</p>
<p>An example:
<eg>&lt;!ENTITY % draft &apos;INCLUDE&apos; &gt;
&lt;!ENTITY % final &apos;IGNORE&apos; &gt;
<eg>&lt;!ENTITY % draft 'INCLUDE' &gt;
&lt;!ENTITY % final 'IGNORE' &gt;
&lt;![%draft;[
&lt;!ELEMENT book (comments*, title, body, supplements?)&gt;
@ -1903,7 +1898,7 @@ called the <termref def="dt-docent">document entity</termref>, which serves
as the starting point for the <termref def="dt-xml-proc">XML
processor</termref> and may contain the whole document.</p>
<p>Entities may be either parsed or unparsed.
<termdef id="dt-parsedent" term="Text Entity">A <term>parsed entity&apos;s</term>
<termdef id="dt-parsedent" term="Text Entity">A <term>parsed entity's</term>
contents are referred to as its
<termref def="dt-repltext">replacement text</termref>;
this <termref def="dt-text">text</termref> is considered an
@ -1942,15 +1937,15 @@ a general entity with the same name are two distinct entities.
A <term>character reference</term> refers to a specific character in the
ISO/IEC 10646 character set, for example one not directly accessible from
available input devices.
<scrap lang="ebnf"><head>Character Reference</head><prod id="NT-CharRef"><lhs>CharRef</lhs><rhs>&apos;&amp;#&apos; [0-9]+ &apos;;&apos; </rhs><rhs>| &apos;&hcro;&apos; [0-9a-fA-F]+ &apos;;&apos;</rhs><wfc def="wf-Legalchar"/></prod></scrap>
<scrap lang="ebnf"><head>Character Reference</head><prod id="NT-CharRef"><lhs>CharRef</lhs><rhs>'&amp;#' [0-9]+ ';' </rhs><rhs>| '&hcro;' [0-9a-fA-F]+ ';'</rhs><wfc def="wf-Legalchar"/></prod></scrap>
<wfcnote id="wf-Legalchar"><head>Legal Character</head><p>Characters referred to using character references must
match the production for
<termref def="NT-Char">Char</termref>.</p></wfcnote>
If the character reference begins with &quot;<code>&amp;#x</code>&quot;, the digits and
letters up to the terminating <code>;</code> provide a hexadecimal
representation of the character&apos;s code point in ISO/IEC 10646.
representation of the character's code point in ISO/IEC 10646.
If it begins just with &quot;<code>&amp;#</code>&quot;, the digits up to the terminating
<code>;</code> provide a decimal representation of the character&apos;s
<code>;</code> provide a decimal representation of the character's
code point.
</termdef>
</p>
@ -1974,7 +1969,7 @@ semicolon
</prod>
<prod id="NT-EntityRef">
<lhs>EntityRef</lhs>
<rhs>&apos;&amp;&apos; <nt def="NT-Name">Name</nt> &apos;;&apos;</rhs>
<rhs>'&amp;' <nt def="NT-Name">Name</nt> ';'</rhs>
<wfc def="wf-entdeclared"/>
<vc def="vc-entdeclared"/>
<wfc def="textent"/>
@ -1982,7 +1977,7 @@ semicolon
</prod>
<prod id="NT-PEReference">
<lhs>PEReference</lhs>
<rhs>&apos;%&apos; <nt def="NT-Name">Name</nt> &apos;;&apos;</rhs>
<rhs>'%' <nt def="NT-Name">Name</nt> ';'</rhs>
<vc def="vc-entdeclared"/>
<wfc def="norecursion"/>
<wfc def="indtd"/>
@ -1992,7 +1987,7 @@ semicolon
<head>Entity Declared</head>
<p>In a document without any DTD, a document with only an internal
DTD subset which contains no parameter entity references, or a document with
&quot;<code>standalone=&apos;yes&apos;</code>&quot;,
&quot;<code>standalone='yes'</code>&quot;,
the <nt def="NT-Name">Name</nt> given in the entity reference must
<termref def="dt-match">match</termref> that in an
<titleref href="sec-entity-decl">entity declaration</titleref>, except that
@ -2007,12 +2002,12 @@ external parameter entities, a non-validating processor is
<titleref href="include-if-valid">not obligated to</titleref> read
and process their declarations; for such documents, the rule that
an entity must be declared is a well-formedness constraint only
if <titleref href="sec-rmd">standalone=&apos;yes&apos;</titleref>.</p>
if <titleref href="sec-rmd">standalone='yes'</titleref>.</p>
</wfcnote>
<vcnote id="vc-entdeclared">
<head>Entity Declared</head>
<p>In a document with an external subset or external parameter
entities with &quot;<code>standalone=&apos;no&apos;</code>&quot;,
entities with &quot;<code>standalone='no'</code>&quot;,
the <nt def="NT-Name">Name</nt> given in the entity reference must <termref def="dt-match">match</termref> that in an
<titleref href="sec-entity-decl">entity declaration</titleref>.
For interoperability, valid documents should declare the entities
@ -2062,11 +2057,11 @@ is classified &amp;security-level;.</eg></p>
<p><termdef id="dt-entdecl" term="entity declaration">
Entities are declared thus:
<scrap lang="ebnf"><head>Entity Declaration</head><prodgroup pcw2="5" pcw4="18.5"><prod id="NT-EntityDecl"><lhs>EntityDecl</lhs><rhs><nt def="NT-GEDecl">GEDecl</nt><!--</rhs><com>General entities</com>
<rhs>--> | <nt def="NT-PEDecl">PEDecl</nt></rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-GEDecl"><lhs>GEDecl</lhs><rhs>&apos;&lt;!ENTITY&apos; <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<rhs>--> | <nt def="NT-PEDecl">PEDecl</nt></rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-GEDecl"><lhs>GEDecl</lhs><rhs>'&lt;!ENTITY' <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt> <nt def="NT-EntityDef">EntityDef</nt>
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-PEDecl"><lhs>PEDecl</lhs><rhs>&apos;&lt;!ENTITY&apos; <nt def="NT-S">S</nt> &apos;%&apos; <nt def="NT-S">S</nt>
<nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-PEDecl"><lhs>PEDecl</lhs><rhs>'&lt;!ENTITY' <nt def="NT-S">S</nt> '%' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt> <nt def="NT-S">S</nt>
<nt def="NT-PEDef">PEDef</nt> <nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-EntityDef"><lhs>EntityDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt>
<nt def="NT-PEDef">PEDef</nt> <nt def="NT-S">S</nt>? '&gt;'</rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-EntityDef"><lhs>EntityDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt>
<!--</rhs>
<rhs>-->| (<nt def="NT-ExternalID">ExternalID</nt>
<nt def="NT-NDataDecl">NDataDecl</nt>?)</rhs><!-- <nt def='NT-ExternalDef'>ExternalDef</nt></rhs> --></prod><!-- FINAL EDIT: what happened to WFs here? --><prod id="NT-PEDef"><lhs>PEDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt>
@ -2107,12 +2102,12 @@ internal, it is an <term>external
entity</term>, declared as follows:
<scrap lang="ebnf"><head>External Entity Declaration</head><!--
<prod id='NT-ExternalDef'><lhs>ExternalDef</lhs>
<rhs></prod> --><prod id="NT-ExternalID"><lhs>ExternalID</lhs><rhs>&apos;SYSTEM&apos; <nt def="NT-S">S</nt>
<nt def="NT-SystemLiteral">SystemLiteral</nt></rhs><rhs>| &apos;PUBLIC&apos; <nt def="NT-S">S</nt>
<rhs></prod> --><prod id="NT-ExternalID"><lhs>ExternalID</lhs><rhs>'SYSTEM' <nt def="NT-S">S</nt>
<nt def="NT-SystemLiteral">SystemLiteral</nt></rhs><rhs>| 'PUBLIC' <nt def="NT-S">S</nt>
<nt def="NT-PubidLiteral">PubidLiteral</nt>
<nt def="NT-S">S</nt>
<nt def="NT-SystemLiteral">SystemLiteral</nt>
</rhs></prod><prod id="NT-NDataDecl"><lhs>NDataDecl</lhs><rhs><nt def="NT-S">S</nt> &apos;NDATA&apos; <nt def="NT-S">S</nt>
</rhs></prod><prod id="NT-NDataDecl"><lhs>NDataDecl</lhs><rhs><nt def="NT-S">S</nt> 'NDATA' <nt def="NT-S">S</nt>
<nt def="NT-Name">Name</nt></rhs><vc def="not-declared"/></prod></scrap>
If the <nt def="NT-NDataDecl">NDataDecl</nt> is present, this is a
general <termref def="dt-unparsed">unparsed
@ -2127,7 +2122,7 @@ The <nt def="NT-Name">Name</nt> must match the declared name of a
</vcnote>
<p><termdef id="dt-sysid" term="System Identifier">The
<nt def="NT-SystemLiteral">SystemLiteral</nt>
is called the entity&apos;s <term>system identifier</term>. It is a URI,
is called the entity's <term>system identifier</term>. It is a URI,
which may be used to retrieve the entity.</termdef>
Note that the hash mark (<code>#</code>) and fragment identifier
frequently used with URIs are not, formally, part of the URI itself;
@ -2151,7 +2146,7 @@ byte value).</p>
<p><termdef id="dt-pubid" term="Public identifier">
In addition to a system identifier, an external identifier may
include a <term>public identifier</term>.</termdef>
An XML processor attempting to retrieve the entity&apos;s content may use the public
An XML processor attempting to retrieve the entity's content may use the public
identifier to try to generate an alternative URI. If the processor
is unable to do so, it must use the URI specified in the system
literal. Before a match is attempted, all strings
@ -2237,10 +2232,10 @@ Parsed entities which are stored in an encoding other than
UTF-8 or UTF-16 must begin with a <titleref href="TextDecl">text
declaration</titleref> containing an encoding declaration:
<scrap lang="ebnf"><head>Encoding Declaration</head><prod id="NT-EncodingDecl"><lhs>EncodingDecl</lhs><rhs><nt def="NT-S">S</nt>
&apos;encoding&apos; <nt def="NT-Eq">Eq</nt>
(&apos;&quot;&apos; <nt def="NT-EncName">EncName</nt> &apos;&quot;&apos; |
&quot;&apos;&quot; <nt def="NT-EncName">EncName</nt> &quot;&apos;&quot; )
</rhs></prod><prod id="NT-EncName"><lhs>EncName</lhs><rhs>[A-Za-z] ([A-Za-z0-9._] | &apos;-&apos;)*</rhs><com>Encoding name contains only Latin characters</com></prod></scrap>
'encoding' <nt def="NT-Eq">Eq</nt>
('&quot;' <nt def="NT-EncName">EncName</nt> '&quot;' |
&quot;'&quot; <nt def="NT-EncName">EncName</nt> &quot;'&quot; )
</rhs></prod><prod id="NT-EncName"><lhs>EncName</lhs><rhs>[A-Za-z] ([A-Za-z0-9._] | '-')*</rhs><com>Encoding name contains only Latin characters</com></prod></scrap>
In the <termref def="dt-docent">document entity</termref>, the encoding
declaration is part of the <termref def="dt-xmldecl">XML declaration</termref>.
The <nt def="NT-EncName">EncName</nt> is the name of the encoding used.
@ -2286,8 +2281,8 @@ an encoding declaration.</p>
<p>It is a <termref def="dt-fatal">fatal error</termref> when an XML processor
encounters an entity with an encoding that it is unable to process.</p>
<p>Examples of encoding declarations:
<eg>&lt;?xml encoding=&apos;UTF-8&apos;?&gt;
&lt;?xml encoding=&apos;EUC-JP&apos;?&gt;</eg></p>
<eg>&lt;?xml encoding='UTF-8'?&gt;
&lt;?xml encoding='EUC-JP'?&gt;</eg></p>
</div3>
</div2>
<div2 id="entproc">
@ -2309,9 +2304,9 @@ the value of an
attribute which has been declared as type <kw>ENTITY</kw>, or as one of
the space-separated tokens in the value of an attribute which has been
declared as type <kw>ENTITIES</kw>.</p></def></gitem><gitem><label>Reference in Entity Value</label><def><p>as a reference
within a parameter or internal entity&apos;s
within a parameter or internal entity's
<termref def="dt-litentval">literal entity value</termref> in
the entity&apos;s declaration; corresponds to the nonterminal
the entity's declaration; corresponds to the nonterminal
<nt def="NT-EntityValue">EntityValue</nt>.</p></def></gitem><gitem><label>Reference in DTD</label><def><p>as a reference within either the internal or external subsets of the
<termref def="dt-doctype">DTD</termref>, but outside
of an <nt def="NT-EntityValue">EntityValue</nt> or
@ -2470,7 +2465,7 @@ replacement text.
If the entity is external, and the processor is not
attempting to validate the XML document, the
processor <termref def="dt-may">may</termref>, but need not,
include the entity&apos;s replacement text.
include the entity's replacement text.
If a non-validating parser does not include the replacement text,
it must inform the application that it recognized, but did not
read, the entity.</p>
@ -2479,7 +2474,7 @@ provided by the SGML and XML entity mechanism, primarily designed
to support modularity in authoring, is not necessarily
appropriate for other applications, in particular document browsing.
Browsers, for example, when encountering an external parsed entity reference,
might choose to provide a visual indication of the entity&apos;s
might choose to provide a visual indication of the entity's
presence and retrieve it for display only on demand.
</p>
</div3>
@ -2508,8 +2503,8 @@ For example, this is well-formed:
<eg><![CDATA[<!ENTITY % YN '"Yes"' >
<!ENTITY WhatHeSaid "He said &YN;" >]]></eg>
while this is not:
<eg>&lt;!ENTITY EndAttr &quot;27&apos;&quot; &gt;
&lt;element attribute=&apos;a-&amp;EndAttr;&gt;</eg>
<eg>&lt;!ENTITY EndAttr &quot;27'&quot; &gt;
&lt;element attribute='a-&amp;EndAttr;&gt;</eg>
</p>
</div3>
<div3 id="notify">
@ -2548,7 +2543,7 @@ entities to contain an integral number of grammatical tokens in the DTD.
<head>Construction of Internal Entity Replacement Text</head>
<p>In discussing the treatment
of internal entities, it is
useful to distinguish two forms of the entity&apos;s value.
useful to distinguish two forms of the entity's value.
<termdef id="dt-litentval" term="Literal Entity Value">The <term>literal
entity value</term> is the quoted string actually
present in the entity declaration, corresponding to the
@ -2579,9 +2574,9 @@ For example, given the following declarations:
&#xA9; 1947 %pub;. &rights;" >]]></eg>
then the replacement text for the entity &quot;<code>book</code>&quot; is:
<eg>La Peste: Albert Camus,
&#169; 1947 &#201;ditions Gallimard. &amp;rights;</eg>
© 1947 Éditions Gallimard. &amp;rights;</eg>
The general-entity reference &quot;<code>&amp;rights;</code>&quot; would be expanded
should the reference &quot;<code>&amp;book;</code>&quot; appear in the document&apos;s
should the reference &quot;<code>&amp;book;</code>&quot; appear in the document's
content or an attribute value.</p>
<p>These simple rules may have complex interactions; for a detailed
discussion of a difficult example, see
@ -2642,11 +2637,11 @@ entity and attribute-list declarations and in attribute specifications,
and an external identifier for the notation which may allow an XML
processor or its client application to locate a helper application
capable of processing data in the given notation.
<scrap lang="ebnf"><head>Notation Declarations</head><prod id="NT-NotationDecl"><lhs>NotationDecl</lhs><rhs>&apos;&lt;!NOTATION&apos; <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<scrap lang="ebnf"><head>Notation Declarations</head><prod id="NT-NotationDecl"><lhs>NotationDecl</lhs><rhs>'&lt;!NOTATION' <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>
<nt def="NT-S">S</nt>
(<nt def="NT-ExternalID">ExternalID</nt> |
<nt def="NT-PublicID">PublicID</nt>)
<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-PublicID"><lhs>PublicID</lhs><rhs>&apos;PUBLIC&apos; <nt def="NT-S">S</nt>
<nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-PublicID"><lhs>PublicID</lhs><rhs>'PUBLIC' <nt def="NT-S">S</nt>
<nt def="NT-PubidLiteral">PubidLiteral</nt>
</rhs></prod></scrap>
</termdef>
@ -2683,7 +2678,7 @@ without any identification at all.</p>
<p>Conforming <termref def="dt-xml-proc">XML processors</termref> fall into two
classes: validating and non-validating.</p>
<p>Validating and non-validating processors alike must report
violations of this specification&apos;s well-formedness constraints
violations of this specification's well-formedness constraints
in the content of the
<termref def="dt-docent">document entity</termref> and any
other <termref def="dt-parsedent">parsed entities</termref> that
@ -2784,7 +2779,7 @@ with a value in the range(s) indicated (inclusive).</p></def></gitem><gitem><lab
with a value <emph>outside</emph> the
range indicated.</p></def></gitem><gitem><label><code>[^abc]</code>, <code>[^#xN#xN#xN]</code></label><def><p>matches any <termref def="dt-character">character</termref>
with a value not among the characters given.</p></def></gitem><gitem><label><code>&quot;string&quot;</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref>
that given inside the double quotes.</p></def></gitem><gitem><label><code>&apos;string&apos;</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref>
that given inside the double quotes.</p></def></gitem><gitem><label><code>'string'</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref>
that given inside the single quotes.</p></def></gitem></glist>
These symbols may be combined to match more complex patterns as follows,
where <code>A</code> and <code>B</code> represent simple expressions:
@ -2861,17 +2856,17 @@ Berners-Lee, T., R. Fielding, and L. Masinter.
Semantics</emph>.
1997.
(Work in progress; see updates to RFC1738.)</bibl>
<bibl id="ABK" key="Br&#252;ggemann-Klein">Br&#252;ggemann-Klein, Anne.
<bibl id="ABK" key="Br<EFBFBD>ggemann-Klein">Br<EFBFBD>ggemann-Klein, Anne.
<emph>Regular Expressions into Finite Automata</emph>.
Extended abstract in I. Simon, Hrsg., LATIN 1992,
S. 97-98. Springer-Verlag, Berlin 1992.
Full Version in Theoretical Computer Science 120: 197-213, 1993.
</bibl>
<bibl id="ABKDW" key="Br&#252;ggemann-Klein and Wood">Br&#252;ggemann-Klein, Anne,
<bibl id="ABKDW" key="Br<EFBFBD>ggemann-Klein and Wood">Br<EFBFBD>ggemann-Klein, Anne,
and Derick Wood.
<emph>Deterministic Regular Languages</emph>.
Universit&#228;t Freiburg, Institut f&#252;r Informatik,
Universit<EFBFBD>t Freiburg, Institut f<EFBFBD>r Informatik,
Bericht 38, Oktober 1991.
</bibl>
<bibl id="Clark" key="Clark">James Clark.
@ -3268,7 +3263,7 @@ rather than name characters, because the property file classifies
them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.</p></item><item><p>Characters #x20DD-#x20E0 are excluded (in accordance with
Unicode, section 5.14).</p></item><item><p>Character #x00B7 is classified as an extender, because the
property list so identifies it.</p></item><item><p>Character #x0387 is added as a name character, because #x00B7
is its canonical equivalent.</p></item><item><p>Characters &apos;:&apos; and &apos;_&apos; are allowed as name-start characters.</p></item><item><p>Characters &apos;-&apos; and &apos;.&apos; are allowed as name characters.</p></item></ulist>
is its canonical equivalent.</p></item><item><p>Characters ':' and '_' are allowed as name-start characters.</p></item><item><p>Characters '-' and '.' are allowed as name characters.</p></item></ulist>
</p>
</div1>
<inform-div1 id="sec-xml-and-sgml">
@ -3365,7 +3360,7 @@ In this case, the two references to
<code>b</code> can be collapsed
into a single reference, making the model read
<code>(b, (c | d))</code>. An initial <code>b</code> now clearly
matches only a single name in the content model. The parser doesn&apos;t
matches only a single name in the content model. The parser doesn't
need to look ahead to see what follows; either <code>c</code> or
<code>d</code> would be accepted.</p>
<p>More formally: a finite state automaton may be constructed from the
@ -3384,7 +3379,7 @@ and may be reported as an error.
</p>
<p>Algorithms exist which allow many but not all non-deterministic
content models to be reduced automatically to equivalent deterministic
models; see Br&#252;ggemann-Klein 1991 <bibref ref="ABK"/>.</p>
models; see Br<EFBFBD>ggemann-Klein 1991 <bibref ref="ABK"/>.</p>
</inform-div1>
<inform-div1 id="sec-guessing">
<head>Autodetection of Character Encodings</head>
@ -3408,10 +3403,10 @@ processor without, or with, any accompanying
<p>
Because each XML entity not in UTF-8 or UTF-16 format <emph>must</emph>
begin with an XML encoding declaration, in which the first characters
must be &apos;<code>&lt;?xml</code>&apos;, any conforming processor can detect,
must be '<code>&lt;?xml</code>', any conforming processor can detect,
after two to four octets of input, which of the following cases apply.
In reading this list, it may help to know that in UCS-4, &apos;&lt;&apos; is
&quot;<code>#x0000003C</code>&quot; and &apos;?&apos; is &quot;<code>#x0000003F</code>&quot;, and the Byte
In reading this list, it may help to know that in UCS-4, '&lt;' is
&quot;<code>#x0000003C</code>&quot; and '?' is &quot;<code>#x0000003F</code>&quot;, and the Byte
Order Mark required of UTF-16 data streams is &quot;<code>#xFEFF</code>&quot;.</p>
<p>
<ulist><item><p><code>00 00 00 3C</code>: UCS-4, big-endian machine (1234 order)</p></item><item><p><code>3C 00 00 00</code>: UCS-4, little-endian machine (4321 order)</p></item><item><p><code>00 00 3C 00</code>: UCS-4, unusual octet order (2143)</p></item><item><p><code>00 3C 00 00</code>: UCS-4, unusual octet order (3412)</p></item><item><p><code>FE FF</code>: UTF-16, big-endian</p></item><item><p><code>FF FE</code>: UTF-16, little-endian</p></item><item><p><code>00 3C 00 3F</code>: UTF-16, big-endian, no Byte Order Mark
@ -3456,7 +3451,7 @@ character of input.
</p>
<p>
Like any self-labeling system, the XML encoding declaration will not
work if any software changes the entity&apos;s character set or encoding
work if any software changes the entity's character set or encoding
without updating the encoding declaration. Implementors of
character-encoding routines should be careful to ensure the accuracy
of the internal and external information used to label the entity.
@ -3556,7 +3551,7 @@ Co-editor</role>
<name>Joel Nava, Adobe</name>
</member>
<member>
<name>Conleth O&apos;Connell, Vignette</name>
<name>Conleth O'Connell, Vignette</name>
</member>
<member>
<name>Peter Sharpe, SoftQuad</name>

View File

@ -3,39 +3,39 @@
<!ELEMENT diagram (diagramdata , layer*)>
<!ELEMENT diagramdata (attribute)*>
<!ELEMENT layer (object | group)*>
<!ELEMENT object (attribute* , connections?)>
<!ELEMENT connections (connection)*>
<!ELEMENT connection EMPTY>
<!ELEMENT group (object | group)*>
<!ELEMENT attribute (composite | int | enum | real | boolean | color | point | rectangle | string | font)*>
<!ELEMENT composite (attribute)*>
<!ELEMENT int EMPTY>
<!ELEMENT enum EMPTY>
<!ELEMENT real EMPTY>
<!ELEMENT boolean EMPTY>
<!ELEMENT color EMPTY>
<!ELEMENT point EMPTY>
<!ELEMENT rectangle EMPTY>
<!ELEMENT string EMPTY>
<!ELEMENT font EMPTY>
<!ATTLIST layer name CDATA #REQUIRED>
<!ATTLIST layer visible (true | false) #REQUIRED>
<!ELEMENT object (attribute* , connections?)>
<!ATTLIST object type CDATA #REQUIRED>
<!ATTLIST object version NMTOKEN #REQUIRED>
<!ATTLIST object id ID #REQUIRED>
<!ELEMENT connections (connection)*>
<!ELEMENT connection EMPTY>
<!ATTLIST connection handle NMTOKEN #REQUIRED>
<!ATTLIST connection to IDREF #REQUIRED>
<!ATTLIST connection connection NMTOKEN #REQUIRED>
<!ELEMENT group (object | group)*>
<!ELEMENT attribute (composite | int | enum | real | boolean | color | point | rectangle | string | font)*>
<!ATTLIST attribute name CDATA #REQUIRED>
<!ELEMENT composite (attribute)*>
<!ATTLIST composite type CDATA #IMPLIED>
<!ELEMENT int EMPTY>
<!ATTLIST int val NMTOKEN #REQUIRED>
<!ELEMENT enum EMPTY>
<!ATTLIST enum val NMTOKEN #REQUIRED>
<!ELEMENT real EMPTY>
<!ATTLIST real val CDATA #REQUIRED>
<!ELEMENT boolean EMPTY>
<!ATTLIST boolean val (true | false) #REQUIRED>
<!ELEMENT color EMPTY>
<!ATTLIST color val CDATA #REQUIRED>
<!ELEMENT point EMPTY>
<!ATTLIST point val CDATA #REQUIRED>
<!ELEMENT rectangle EMPTY>
<!ATTLIST rectangle val CDATA #REQUIRED>
<!ELEMENT string EMPTY>
<!ATTLIST string val CDATA #IMPLIED>
<!ELEMENT font EMPTY>
<!ATTLIST font name CDATA #REQUIRED>
]>
<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">

View File

@ -68,7 +68,7 @@ type="text/css"?>
</status>
<abstract>
<!-- edited the abstract for further clarity - bent -->
<p>This specification defines constructs that may be inserted into XML DTDs, schemas and document instances to describe links between objects. It uses XML syntax to create structures that can describe the simple unidirectional hyperlinks of today&apos;s HTML as well as more sophisticated links.</p>
<p>This specification defines constructs that may be inserted into XML DTDs, schemas and document instances to describe links between objects. It uses XML syntax to create structures that can describe the simple unidirectional hyperlinks of today's HTML as well as more sophisticated links.</p>
</abstract>
<pubstmt>
<p>Burlington, Seekonk, et al.: World-Wide Web Consortium, XML Working Group, 1998.</p>
@ -99,7 +99,7 @@ type="text/css"?>
<sitem>1999-05-12: Prose/organization work. Re-organized some of the sections, removed XML constructs from the document, added descriptive prose, edited document text for clarity. Rewrote the link recognition section. bent</sitem>
<sitem>1999-05-17: Further prose work. Added non-normative examples. Clarified arcs. bent</sitem>
<sitem>1999-05-23: Edited for grammar and clarity. bent</sitem>
<sitem>1999-05-27: Final once-over before sending to group. Fixed sjd&apos;s email address. bent</sitem>
<sitem>1999-05-27: Final once-over before sending to group. Fixed sjd's email address. bent</sitem>
</slist>
</revisiondesc>
</header>
@ -109,7 +109,7 @@ type="text/css"?>
<head>Introduction</head>
<p>This specification defines constructs that may be inserted into XML DTDs, schemas, and document instances to describe links between objects. A <termref def="dt-link">link</termref>, as the term is used here, is an explicit relationship between two or more data objects or portions of data objects. This specification is concerned with the syntax used to assert link existence and describe link characteristics. Implicit (unasserted) relationships, for example that of one word to the next or that of a word in a text to its entry in an on-line dictionary are obviously important, but outside its scope.</p>
<p>Links are asserted by <xtermref href="WD-xml-lang.html#dt-element">elements </xtermref> contained in <xtermref href="WD-xml-lang.html#dt-xml-doc">XML document instances</xtermref>. The simplest case is very like an HTML <code>A</code> link, and has these characteristics:
<ulist><item><p>The link is expressed at one of its ends (similar to the <code>A</code> element in some document)</p></item><item><p>Users can only initiate travel from that end to the other</p></item><item><p>The link&apos;s effect on windows, frames, go-back lists, stylesheets in use, and so on is mainly determined by browsers, not by the link itself. For example, traveral of <code>A</code> links normally replaces the current view, perhaps with a user option to open a new window.</p></item><item><p>The link goes to only one destination (although a server may have great freedom in finding or dynamically creating that destination).</p></item></ulist>
<ulist><item><p>The link is expressed at one of its ends (similar to the <code>A</code> element in some document)</p></item><item><p>Users can only initiate travel from that end to the other</p></item><item><p>The link's effect on windows, frames, go-back lists, stylesheets in use, and so on is mainly determined by browsers, not by the link itself. For example, traveral of <code>A</code> links normally replaces the current view, perhaps with a user option to open a new window.</p></item><item><p>The link goes to only one destination (although a server may have great freedom in finding or dynamically creating that destination).</p></item></ulist>
</p>
<p>While this set of characteristics is already very powerful and obviously has proven itself highly useful and effective, each of these assumptions also limits the range of hypertext functionality. The linking model defined here provides ways to create links that go beyond each of these specific characteristics, thus providing features previously available mostly in dedicated hypermedia systems.
</p>
@ -137,7 +137,7 @@ document. bent-->
<glist><gitem><label><termdef id="dt-arc" term="Arc">arc</termdef></label><def><p>A symbolic representation of traversal behavior in links, especially the direction, context and timing of traversal.</p></def></gitem><gitem><label><termdef id="dt-eltree" term="Element Tree">element tree</termdef></label><def><p>A representation of the relevant structure specified by the tags and attributes in an XML document, based on &quot;groves&quot; as defined in the ISO DSSSL standard. </p></def></gitem><gitem><label><termdef id="dt-inline" term="In-Line Link">inline link</termdef></label><def><p>Abstractly, a <termref def="dt-link">link</termref> which serves as one of its own <termref def="dt-resource">resources</termref>. Concretely, a link where the content of the <termref def="dt-linkel">linking element</termref> serves as a <termref def="dt-particip-resource">participating resource</termref>.
HTML <code>A</code>, HyTime <code>clink</code>, and TEI <code>XREF</code>
are all inline links.</p></def></gitem><gitem><label><termdef id="dt-link" term="Link">link</termdef></label><def><p>An explicit relationship between two or more data objects or portions of data objects.</p></def></gitem><gitem><label><termdef id="dt-linkel" term="Linking Element">linking element </termdef></label><def><p>An <xtermref href="WD-xml-lang.html#dt-element">element</xtermref> that asserts the existence and describes the characteristics of a <termref def="dt-link"> link</termref>.</p></def></gitem><gitem><label><termdef id="dt-local-resource" term="Local Resource">local resource</termdef></label><def><p>The content of an <termref def="dt-inline">inline</termref>linking element. Note that the content of the linking element could be explicitly pointed to by means of a regular <termref def="dt-locator">locator</termref> in the same linking element, in which case the resource is considered <termref def="dt-remote-resource"> remote</termref>, not local.</p></def></gitem><gitem><label><termdef id="dt-locator" term="Locator">locator</termdef> </label><def><p>Data, provided as part of a link, which identifies a
<termref def="dt-resource">resource</termref>.</p></def></gitem><gitem><label><termdef id="dt-multidir" term="Multi-Directional Link">multidirectional link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose <termref def="dt-traversal"> traversal</termref> can be initiated from more than one of its <termref def="dt-particip-resource"> participating resources</termref>. Note that being able to &quot;go back&quot; after following a one-directional link does not make the link multidirectional.</p></def></gitem><gitem><label><termdef id="dt-outofline" term="Out-of-line Link">out-of-line link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose content does not serve as one of the link&apos;s <termref def="dt-particip-resource">participating resources </termref>. Such links presuppose a notion like <termref def="dt-xlg">extended link groups</termref>, which instruct application software where to look for links. Out-of-line links are generally required for supporting multidirectional <termref def="dt-traversal">traversal</termref> and for allowing read-only resources to have outgoing links.</p></def></gitem><gitem><label><termdef id="dt-parsedq" term="Parsed">parsed</termdef></label><def><p>In the context of link behavior, a parsed link is any link whose content is transcluded into the document where the link originated. The use of the term &quot;parsed&quot; directly refers to the concept in XML of a
<termref def="dt-resource">resource</termref>.</p></def></gitem><gitem><label><termdef id="dt-multidir" term="Multi-Directional Link">multidirectional link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose <termref def="dt-traversal"> traversal</termref> can be initiated from more than one of its <termref def="dt-particip-resource"> participating resources</termref>. Note that being able to &quot;go back&quot; after following a one-directional link does not make the link multidirectional.</p></def></gitem><gitem><label><termdef id="dt-outofline" term="Out-of-line Link">out-of-line link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose content does not serve as one of the link's <termref def="dt-particip-resource">participating resources </termref>. Such links presuppose a notion like <termref def="dt-xlg">extended link groups</termref>, which instruct application software where to look for links. Out-of-line links are generally required for supporting multidirectional <termref def="dt-traversal">traversal</termref> and for allowing read-only resources to have outgoing links.</p></def></gitem><gitem><label><termdef id="dt-parsedq" term="Parsed">parsed</termdef></label><def><p>In the context of link behavior, a parsed link is any link whose content is transcluded into the document where the link originated. The use of the term &quot;parsed&quot; directly refers to the concept in XML of a
parsed entity.</p></def></gitem><gitem><label><termdef id="dt-particip-resource" term="Participating Resource"> participating resource</termdef></label><def><p>A <termref def="dt-resource">resource</termref> that belongs to a link. All resources are potential contributors to a link; participating resources are the actual contributors to a particular link.</p></def></gitem><gitem><label><termdef id="dt-remote-resource" term="Remote Resource">remote resource</termdef></label><def><p>Any participating resource of a link that is pointed to with a locator. </p></def></gitem><gitem><label><termdef id="dt-resource" term="Resource">resource</termdef></label><def><p>In the abstract sense, an addressable unit of information or service that is participating in a <termref def="dt-link">link</termref>. Examples include files, images, documents, programs, and query results. Concretely, anything reachable by the use of a <termref def="dt-locator">locator</termref> in some <termref def="dt-linkel">linking element</termref>. Note that this term and its definition are taken from the basic specifications governing the World Wide Web. <!--Joel notes: need link here. bent asks: A link?-->
</p></def></gitem><gitem><label><termdef id="dt-subresource" term="sub-Resource">sub-resource</termdef></label><def><p>A portion of a resource, pointed to as the precise destination of a link. As one example, a link might specify that an entire document be retrieved and displayed, but that some specific part(s) of it is the specific linked data, to be treated in an application-appropriate manner such as indication by highlighting, scrolling, etc.</p></def></gitem><gitem><label><termdef id="dt-traversal" term="Traversal">traversal</termdef></label><def><p>The action of using a <termref def="dt-link">link</termref>; that is, of accessing a <termref def="dt-resource">resource</termref>. Traversal may be initiated by a user action (for example, clicking on the displayed content of a <termref def="dt-linkel">linking element</termref>) or occur under program control.</p></def></gitem></glist>
</p>
@ -156,8 +156,8 @@ document. bent-->
<p>A locator generally contains a URI, as described in IETF RFCs <bibref ref="rfc1738"/> and <bibref ref="rfc1808"/>. As these RFCs state, the URI may include a trailing <emph>query</emph> (marked by a leading &quot;<code>?</code>&quot;), and be followed by a &quot;<code>#</code>&quot; and a <emph>fragment identifier</emph>, with the query interpreted by the host providing the indicated resource, and the interpretation of the fragment identifier dependent on the data type of the indicated resource.</p>
<!--Is there some restriction on URNs having queries and/or fragment identifiers? Since these RFCs don't mention URIs explicitly, should the wording here lead from URLs to URIs more explicitly? -elm-->
<p>In order to locate XML documents and portions of documents, a locator value may contain either a <xtermref href="http://www.w3.org/Addressing/rfc1738.txt"> URI</xtermref> or a fragment identifier, or both. Any fragment identifier for pointing into XML must be an <xtermref href="http://www.w3.org/TR/WD-xptr#dt-xpointer"> XPointer</xtermref>.</p>
<p>Special syntax may be used to request the use of particular processing models in accessing the locator&apos;s resource. This is designed to reflect the realities of network operation, where it may or may not be desirable to exercise fine control over the distribution of work between local and remote processors.
<scrap id="locator" lang="ebnf"><head>Locator</head><prod id="nt-locator"><lhs>Locator</lhs><rhs><nt def="nt-uri">URI</nt></rhs><rhs>| <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs><rhs>| <nt def="nt-uri">URI</nt> <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs></prod><prod id="nt-connector"><lhs>Connector</lhs><rhs>&apos;#&apos; | &apos;|&apos;</rhs></prod><prod id="nt-uri"><lhs>URI</lhs><rhs><xnt href="WD-xml-lang.html#NT-URLchar">URIchar*</xnt></rhs></prod></scrap>
<p>Special syntax may be used to request the use of particular processing models in accessing the locator's resource. This is designed to reflect the realities of network operation, where it may or may not be desirable to exercise fine control over the distribution of work between local and remote processors.
<scrap id="locator" lang="ebnf"><head>Locator</head><prod id="nt-locator"><lhs>Locator</lhs><rhs><nt def="nt-uri">URI</nt></rhs><rhs>| <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs><rhs>| <nt def="nt-uri">URI</nt> <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs></prod><prod id="nt-connector"><lhs>Connector</lhs><rhs>'#' | '|'</rhs></prod><prod id="nt-uri"><lhs>URI</lhs><rhs><xnt href="WD-xml-lang.html#NT-URLchar">URIchar*</xnt></rhs></prod></scrap>
</p>
<p><termdef id="dt-designated" term="Designated Resource">In this discussion, the term <term>designated resource</term> refers to the resource which an entire locator serves to locate.</termdef> The following rules apply:
<ulist><item><p><termdef id="dt-containing-resource" term="Containing Resource"> The URI, if provided, locates a resource called the <term>containing resource</term>.</termdef></p></item><item><p>If the URI is not provided, the containing resource is considered to be the document in which the linking element is contained.
@ -169,7 +169,7 @@ document. bent-->
</p>
<p>Note that the definition of a URI includes an optional query component. </p>
<p>In the case where the URI contains a query (to be interpreted by the server), information providers and authors of server software are urged to use queries as follows:
<scrap id="querysyntax" lang="ebnf"><head>Query</head><prod id="nt-query"><lhs>Query</lhs><rhs>&apos;XML-XPTR=&apos; (<xnt href="http://www.w3.org/TR/WD-xptr"> XPointer</xnt> | <xnt href="http://www.w3.org/TR/REC-xml#NT-Name">Name</xnt>)</rhs></prod></scrap>
<scrap id="querysyntax" lang="ebnf"><head>Query</head><prod id="nt-query"><lhs>Query</lhs><rhs>'XML-XPTR=' (<xnt href="http://www.w3.org/TR/WD-xptr"> XPointer</xnt> | <xnt href="http://www.w3.org/TR/REC-xml#NT-Name">Name</xnt>)</rhs></prod></scrap>
</p>
<!-- fixed link to XML recommendation - bent -->
</div1>
@ -177,7 +177,7 @@ document. bent-->
<?Pub Dtl?>
<head>Link Recognition</head>
<p>The existence of a <termref def="dt-link">link</termref> is asserted by a <termref def="dt-linkel">linking element</termref>. Linking elements must be recognized reliably by application software in order to provide appropriate display and behavior. There are several ways link recognition could be accomplished: for example, reserving element type names, reserving attributes names, leaving the matter of recognition entirely up to stylesheets and application software, or using the XLink <xtermref href="http://www.w3.org/TR/REC-xml-names/">namespace</xtermref> to specify element names and attribute names that would be recognized by namespace and XLink-aware processors. Using element and attribute names within the XLink namespace provides a balance between giving users control of their own markup language design and keeping the identification of linking elements simple and unambiguous.</p>
<p>The two approaches to identifying linking elements are relatively simple to implement. For example, here&apos;s how the HTML <code>A</code> element would be declared using attributes within the XLink namespace, and then how an element within the XLink namespace might do the same:
<p>The two approaches to identifying linking elements are relatively simple to implement. For example, here's how the HTML <code>A</code> element would be declared using attributes within the XLink namespace, and then how an element within the XLink namespace might do the same:
<eg>&lt;A xlink:type=&quot;simple&quot; xlink:href=&quot;http://www.w3.org/TR/wd-xlink/&quot;
xlink:title=&quot;The Xlink Working Draft&quot;&gt;The XLink Working Draft.&lt;/A&gt;</eg>
<eg>&lt;xlink:simple href=&quot;http://www.w3.org/TR/wd-xlink/&quot;
@ -208,8 +208,8 @@ title=&quot;The XLink Working Draft&quot;&gt;The XLink Working Draft&lt;/xlink:s
</div2>
<div2 id="link-semantics">
<head>Semantic Attributes</head>
<p>There are two attributes associated with semantics, <code>role</code> and <code>title</code>. The <code>role</code> attribute is a generic string used to describe the function of the link&apos;s content. For example, a poem might have a link with a <code>role=&quot;stanza&quot;</code>. The <code>role</code> is also used as an identifier for the <code>from</code> and <code>to</code> attributes of arcs.</p>
<p>The <code>title</code> attribute is designed to provide human-readable text describing the link. It is very useful for those who have text-based applications, whether that be due to a constricted device that cannot display the link&apos;s content, or if it&apos;s being read by an application to a visually-impaired user, or if it&apos;s being used to create a table of links. The <code>title</code> attribute contains a simple, descriptive string.</p>
<p>There are two attributes associated with semantics, <code>role</code> and <code>title</code>. The <code>role</code> attribute is a generic string used to describe the function of the link's content. For example, a poem might have a link with a <code>role=&quot;stanza&quot;</code>. The <code>role</code> is also used as an identifier for the <code>from</code> and <code>to</code> attributes of arcs.</p>
<p>The <code>title</code> attribute is designed to provide human-readable text describing the link. It is very useful for those who have text-based applications, whether that be due to a constricted device that cannot display the link's content, or if it's being read by an application to a visually-impaired user, or if it's being used to create a table of links. The <code>title</code> attribute contains a simple, descriptive string.</p>
</div2>
</div1>
<div1 id="linking-elements">
@ -257,7 +257,7 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
<p>Note that it is meaningful to have an out-of-line simple link, although
such links are uncommon. They are called &quot;one-ended&quot; and are typically used
to associate discrete semantic properties with locations. The properties might
be expressed by attributes on the link, the link&apos;s element type name, or in
be expressed by attributes on the link, the link's element type name, or in
some other way, and are not considered full-fledged resources of the link.
Most out-of-line links are extended links, as these have a far wider range
of uses.</p>
@ -270,7 +270,7 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
<p>These additional capabilities of extended links are required for:
<ulist><item><p>Enabling outgoing links in documents that cannot be modified to add an inline link</p></item><item><p>Creating links to and from resources in formats with no native support for embedded links (such as most multimedia formats)</p></item><item><p>Applying and filtering sets of relevant links on demand</p></item><item><p>Enabling other advanced hypermedia capabilities</p></item></ulist>
</p>
<p>Application software might be expected to provide traversal among all of a link&apos;s participating resources (subject to semantic constraints outside the scope of this specification) and to signal the fact that a given resource or sub-resource participates in one or more links when it is displayed (even though there is no markup at exactly that point to signal it).</p>
<p>Application software might be expected to provide traversal among all of a link's participating resources (subject to semantic constraints outside the scope of this specification) and to signal the fact that a given resource or sub-resource participates in one or more links when it is displayed (even though there is no markup at exactly that point to signal it).</p>
<p>A linking element for an extended link contains a series of <xtermref href="http://www.w3.org/TR/REC-xml/#dt-parentchild">child elements</xtermref> that serve as locators and arcs. Because an extended link can have more than one remote resource, it separates out linking itself from the mechanisms used to locate each resource (whereas a simple link combines the two).</p>
<p>The <code>xlink:type</code> attribute value for an extended link must be <code> extended</code>, if the link is being instantiated on an arbitrary element. Note that extended links introduce variants of the <code>show</code> and <code>actuate</code> behavior attributes. These attributes, the <code>showdefault</code> and <code>actuatedefault</code> define the same behavior as their counterparts. However, in this case, they are considered to define the default behavior for all the linking elements that they contain.</p>
<p>However, when a linking element within an extended link has a <code>show</code> or <code>actuate</code> attribute of its own, that attribute overrides the defaults set on the extended linking element.</p>
@ -293,13 +293,13 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
xlink:showdefault (new|parsed|replace) #IMPLIED
xlink:actuatedefault (user|auto) #IMPLIED &gt;</eg>
The following two examples demonstrate how each of the above might appear within a document instance. Note that the content of these examples would be other elements. For brevity&apos;s sake, they&apos;ve been left blank. The first example shows how the link might appear, using an explicit XLink extended link:
The following two examples demonstrate how each of the above might appear within a document instance. Note that the content of these examples would be other elements. For brevity's sake, they've been left blank. The first example shows how the link might appear, using an explicit XLink extended link:
<eg>&lt;xlink:extended role=&quot;address book&quot; title=&quot;Ben&apos;s Address Book&quot; showdefault=&quot;replace&quot; actuatedefault=&quot;user&quot;&gt; ... &lt;/xlink:extended&gt;</eg>
<eg>&lt;xlink:extended role=&quot;address book&quot; title=&quot;Ben's Address Book&quot; showdefault=&quot;replace&quot; actuatedefault=&quot;user&quot;&gt; ... &lt;/xlink:extended&gt;</eg>
And the second shows how the link might appear, using an arbitrary element:
<eg>&lt;foo xlink:type=&quot;extended&quot; xlink:role=&quot;address book&quot; xlink:title=&quot;Ben&apos;s Address Book&quot; xlink:showdefault=&quot;replace&quot; xlink:actuatedefault=&quot;user&quot;&gt; ... &lt;/foo&gt;</eg>
<eg>&lt;foo xlink:type=&quot;extended&quot; xlink:role=&quot;address book&quot; xlink:title=&quot;Ben's Address Book&quot; xlink:showdefault=&quot;replace&quot; xlink:actuatedefault=&quot;user&quot;&gt; ... &lt;/foo&gt;</eg>
</p>
</div2>
<div2 id="xlink-arcs">

View File

@ -1,6 +1,6 @@
./test/valid/xlink.xml:450: validity error: ID dt-arc already defined
<p><termdef id="dt-arc" term="Arc">An <term>arc</term> is contained within an
^
./test/valid/xlink.xml:530: validity error: IDREF attribute def reference an unknown ID 'dt-xlg'
./test/valid/xlink.xml:530: validity error: IDREF attribute def reference an unknown ID "dt-xlg"
^

View File

@ -1,8 +1,8 @@
<?xml version="1.0"?>
<!DOCTYPE test [
<!ELEMENT test (#PCDATA)>
<!ENTITY % xx "&#37;zz;">
<!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >'>
<!ENTITY tricky "error-prone">
<!ELEMENT test (#PCDATA)>
]>
<test>This sample shows a &tricky; method.</test>

View File

@ -1,5 +1,5 @@
<!DOCTYPE doc [
<!ENTITY % YN '"Yes"' >
<!ENTITY WhatHeSaid "He said %YN;" >
<!ENTITY YN '"Yes"' >
<!ENTITY WhatHeSaid "He said &YN;" >
]>
<doc>&WhatHeSaid;</doc>

View File

@ -73,6 +73,7 @@ xmlSAXHandler emptySAXHandlerStruct = {
NULL, /* xmlParserError */
NULL, /* xmlParserError */
NULL, /* getParameterEntity */
NULL, /* cdataBlock; */
};
xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
@ -454,6 +455,21 @@ processingInstructionDebug(void *ctx, const xmlChar *target,
(char *) target, (char *) data);
}
/**
* cdataBlockDebug:
* @ctx: the user data (XML parser context)
* @value: The pcdata content
* @len: the block length
*
* called when a pcdata block has been parsed
*/
void
cdataBlockDebug(void *ctx, const xmlChar *value, int len)
{
fprintf(stderr, "SAX.pcdata(%.20s, %d)\n",
(char *) value, len);
}
/**
* commentDebug:
* @ctxt: An XML parser context
@ -553,6 +569,7 @@ xmlSAXHandler debugSAXHandlerStruct = {
errorDebug,
fatalErrorDebug,
getParameterEntityDebug,
cdataBlockDebug
};
xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;

322
tester.c
View File

@ -14,6 +14,8 @@
#include <stdio.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
@ -39,6 +41,7 @@
#include "xmlmemory.h"
#include "parser.h"
#include "parserInternals.h"
#include "HTMLparser.h"
#include "HTMLtree.h"
#include "tree.h"
@ -51,18 +54,252 @@ static int copy = 0;
static int recovery = 0;
static int noent = 0;
static int noout = 0;
static int nowrap = 0;
static int valid = 0;
static int postvalid = 0;
static int repeat = 0;
static int insert = 0;
static int compress = 0;
static int html = 0;
static int htmlout = 0;
static int shell = 0;
static int push = 0;
static int blanks = 0;
static int noblanks = 0;
extern int xmlDoValidityCheckingDefaultValue;
extern int xmlGetWarningsDefaultValue;
/************************************************************************
* *
* HTML ouput *
* *
************************************************************************/
char buffer[50000];
void
xmlHTMLEncodeSend(void) {
char *result;
result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
if (result) {
fprintf(stderr, "%s", result);
xmlFree(result);
}
buffer[0] = 0;
}
/**
* xmlHTMLPrintFileInfo:
* @input: an xmlParserInputPtr input
*
* Displays the associated file and line informations for the current input
*/
void
xmlHTMLPrintFileInfo(xmlParserInputPtr input) {
fprintf(stderr, "<p>");
if (input != NULL) {
if (input->filename) {
sprintf(&buffer[strlen(buffer)], "%s:%d: ", input->filename,
input->line);
} else {
sprintf(&buffer[strlen(buffer)], "Entity: line %d: ", input->line);
}
}
xmlHTMLEncodeSend();
}
/**
* xmlHTMLPrintFileContext:
* @input: an xmlParserInputPtr input
*
* Displays current context within the input content for error tracking
*/
void
xmlHTMLPrintFileContext(xmlParserInputPtr input) {
const xmlChar *cur, *base;
int n;
if (input == NULL) return;
fprintf(stderr, "<pre>\n");
cur = input->cur;
base = input->base;
while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) {
cur--;
}
n = 0;
while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r'))
cur--;
if ((*cur == '\n') || (*cur == '\r')) cur++;
base = cur;
n = 0;
while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) {
sprintf(&buffer[strlen(buffer)], "%c", (unsigned char) *cur++);
n++;
}
sprintf(&buffer[strlen(buffer)], "\n");
cur = input->cur;
while ((*cur == '\n') || (*cur == '\r'))
cur--;
n = 0;
while ((cur != base) && (n++ < 80)) {
sprintf(&buffer[strlen(buffer)], " ");
base++;
}
sprintf(&buffer[strlen(buffer)],"^\n");
xmlHTMLEncodeSend();
fprintf(stderr, "</pre>");
}
/**
* xmlHTMLError:
* @ctx: an XML parser context
* @msg: the message to display/transmit
* @...: extra parameters for the message display
*
* Display and format an error messages, gives file, line, position and
* extra parameters.
*/
void
xmlHTMLError(void *ctx, const char *msg, ...)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr input;
xmlParserInputPtr cur = NULL;
va_list args;
buffer[0] = 0;
input = ctxt->input;
if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
cur = input;
input = ctxt->inputTab[ctxt->inputNr - 2];
}
xmlHTMLPrintFileInfo(input);
fprintf(stderr, "<b>error</b>: ");
va_start(args, msg);
vsprintf(&buffer[strlen(buffer)], msg, args);
va_end(args);
xmlHTMLEncodeSend();
fprintf(stderr, "</p>\n");
xmlHTMLPrintFileContext(input);
xmlHTMLEncodeSend();
}
/**
* xmlHTMLWarning:
* @ctx: an XML parser context
* @msg: the message to display/transmit
* @...: extra parameters for the message display
*
* Display and format a warning messages, gives file, line, position and
* extra parameters.
*/
void
xmlHTMLWarning(void *ctx, const char *msg, ...)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr input;
xmlParserInputPtr cur = NULL;
va_list args;
buffer[0] = 0;
input = ctxt->input;
if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
cur = input;
input = ctxt->inputTab[ctxt->inputNr - 2];
}
xmlHTMLPrintFileInfo(input);
fprintf(stderr, "<b>warning</b>: ");
va_start(args, msg);
vsprintf(&buffer[strlen(buffer)], msg, args);
va_end(args);
xmlHTMLEncodeSend();
fprintf(stderr, "</p>\n");
xmlHTMLPrintFileContext(input);
xmlHTMLEncodeSend();
}
/**
* xmlHTMLValidityError:
* @ctx: an XML parser context
* @msg: the message to display/transmit
* @...: extra parameters for the message display
*
* Display and format an validity error messages, gives file,
* line, position and extra parameters.
*/
void
xmlHTMLValidityError(void *ctx, const char *msg, ...)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr input;
va_list args;
buffer[0] = 0;
input = ctxt->input;
if ((input->filename == NULL) && (ctxt->inputNr > 1))
input = ctxt->inputTab[ctxt->inputNr - 2];
xmlHTMLPrintFileInfo(input);
fprintf(stderr, "<b>validity error</b>: ");
va_start(args, msg);
vsprintf(&buffer[strlen(buffer)], msg, args);
va_end(args);
xmlHTMLEncodeSend();
fprintf(stderr, "</p>\n");
xmlHTMLPrintFileContext(input);
xmlHTMLEncodeSend();
}
/**
* xmlHTMLValidityWarning:
* @ctx: an XML parser context
* @msg: the message to display/transmit
* @...: extra parameters for the message display
*
* Display and format a validity warning messages, gives file, line,
* position and extra parameters.
*/
void
xmlHTMLValidityWarning(void *ctx, const char *msg, ...)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr input;
va_list args;
buffer[0] = 0;
input = ctxt->input;
if ((input->filename == NULL) && (ctxt->inputNr > 1))
input = ctxt->inputTab[ctxt->inputNr - 2];
xmlHTMLPrintFileInfo(input);
fprintf(stderr, "<b>validity warning</b>: ");
va_start(args, msg);
vsprintf(&buffer[strlen(buffer)], msg, args);
va_end(args);
xmlHTMLEncodeSend();
fprintf(stderr, "</p>\n");
xmlHTMLPrintFileContext(input);
xmlHTMLEncodeSend();
}
/************************************************************************
* *
* Shell Interface *
* *
************************************************************************/
/**
* xmlShellReadline:
* @prompt: the prompt value
@ -97,6 +334,11 @@ xmlShellReadline(char *prompt) {
#endif
}
/************************************************************************
* *
* Test processing *
* *
************************************************************************/
void parseAndPrintFile(char *filename) {
xmlDocPtr doc = NULL, tmp;
@ -129,9 +371,40 @@ void parseAndPrintFile(char *filename) {
xmlFreeParserCtxt(ctxt);
}
}
} else if (recovery)
} else if (recovery) {
doc = xmlRecoverFile(filename);
} else if (htmlout) {
int ret;
xmlParserCtxtPtr ctxt;
xmlSAXHandler silent, *old;
ctxt = xmlCreateFileParserCtxt(filename);
memcpy(&silent, ctxt->sax, sizeof(silent));
old = ctxt->sax;
silent.error = xmlHTMLError;
if (xmlGetWarningsDefaultValue)
silent.warning = xmlHTMLWarning;
else
silent.warning = NULL;
silent.fatalError = xmlHTMLError;
ctxt->sax = &silent;
ctxt->vctxt.error = xmlHTMLValidityError;
if (xmlGetWarningsDefaultValue)
ctxt->vctxt.warning = xmlHTMLValidityWarning;
else
ctxt->vctxt.warning = NULL;
xmlParseDocument(ctxt);
ret = ctxt->wellFormed;
doc = ctxt->myDoc;
ctxt->sax = old;
xmlFreeParserCtxt(ctxt);
if (!ret) {
xmlFreeDoc(doc);
doc = NULL;
}
} else
doc = xmlParseFile(filename);
}
@ -155,8 +428,8 @@ void parseAndPrintFile(char *filename) {
int nb, i;
xmlNodePtr node;
if (doc->root != NULL) {
node = doc->root;
if (doc->children != NULL) {
node = doc->children;
while ((node != NULL) && (node->last == NULL)) node = node->next;
if (node != NULL) {
nb = xmlValidGetValidElements(node->last, NULL, list, 256);
@ -224,6 +497,12 @@ int main(int argc, char **argv) {
else if ((!strcmp(argv[i], "-noout")) ||
(!strcmp(argv[i], "--noout")))
noout++;
else if ((!strcmp(argv[i], "-htmlout")) ||
(!strcmp(argv[i], "--htmlout")))
htmlout++;
else if ((!strcmp(argv[i], "-nowrap")) ||
(!strcmp(argv[i], "--nowrap")))
nowrap++;
else if ((!strcmp(argv[i], "-valid")) ||
(!strcmp(argv[i], "--valid")))
valid++;
@ -244,15 +523,19 @@ int main(int argc, char **argv) {
compress++;
xmlSetCompressMode(9);
}
else if ((!strcmp(argv[i], "-blanks")) ||
(!strcmp(argv[i], "--blanks"))) {
blanks++;
xmlKeepBlanksDefault(1);
}
else if ((!strcmp(argv[i], "-html")) ||
(!strcmp(argv[i], "--html"))) {
html++;
}
else if ((!strcmp(argv[i], "-nowarning")) ||
(!strcmp(argv[i], "--nowarning"))) {
xmlGetWarningsDefaultValue = 0;
}
else if ((!strcmp(argv[i], "-noblanks")) ||
(!strcmp(argv[i], "--noblanks"))) {
noblanks++;
xmlKeepBlanksDefault(0);
}
else if ((!strcmp(argv[i], "-shell")) ||
(!strcmp(argv[i], "--shell"))) {
shell++;
@ -261,6 +544,17 @@ int main(int argc, char **argv) {
}
if (noent != 0) xmlSubstituteEntitiesDefault(1);
if (valid != 0) xmlDoValidityCheckingDefaultValue = 1;
if ((htmlout) && (!nowrap)) {
fprintf(stderr,
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n");
fprintf(stderr, "\t\"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
fprintf(stderr,
"<html><head><title>%s output</title></head>\n",
argv[0]);
fprintf(stderr,
"<body bgcolor=\"#ffffff\"><h1 align=\"center\">%s output</h1>\n",
argv[0]);
}
for (i = 1; i < argc ; i++) {
if (argv[i][0] != '-') {
if (repeat) {
@ -271,8 +565,11 @@ int main(int argc, char **argv) {
files ++;
}
}
if ((htmlout) && (!nowrap)) {
fprintf(stderr, "</body></html>\n");
}
if (files == 0) {
printf("Usage : %s [--debug] [--shell] [--debugent] [--copy] [--recover] [--noent] [--noout] [--valid] [--repeat] XMLfiles ...\n",
printf("Usage : %s [--debug] [--debugent] [--copy] [--recover] [--noent] [--noout] [--valid] [--repeat] XMLfiles ...\n",
argv[0]);
printf("\tParse the XML files and output the result of the parsing\n");
printf("\t--debug : dump a debug tree of the in-memory document\n");
@ -281,6 +578,8 @@ int main(int argc, char **argv) {
printf("\t--recover : output what was parsable on broken XML documents\n");
printf("\t--noent : substitute entity references by their value\n");
printf("\t--noout : don't output the result tree\n");
printf("\t--htmlout : output results as HTML\n");
printf("\t--nowarp : do not put HTML doc wrapper\n");
printf("\t--valid : validate the document in addition to std well-formed check\n");
printf("\t--postvalid : do a posteriori validation, i.e after parsing\n");
printf("\t--repeat : repeat 100 times, for timing or profiling\n");
@ -288,8 +587,9 @@ int main(int argc, char **argv) {
printf("\t--compress : turn on gzip compression of output\n");
printf("\t--html : use the HTML parser\n");
printf("\t--shell : run a navigating shell\n");
printf("\t--blanks : keep blank text node\n");
printf("\t--push : use the push mode of the parser\n");
printf("\t--nowarning : do not emit warnings from parser/validator\n");
printf("\t--noblanks : drop (ignorable?) blanks spaces\n");
}
xmlCleanupParser();
xmlMemoryDump();

811
tree.c

File diff suppressed because it is too large Load Diff

121
tree.h
View File

@ -36,24 +36,22 @@ typedef enum {
XML_DOCUMENT_TYPE_NODE= 10,
XML_DOCUMENT_FRAG_NODE= 11,
XML_NOTATION_NODE= 12,
XML_HTML_DOCUMENT_NODE= 13
XML_HTML_DOCUMENT_NODE= 13,
XML_DTD_NODE= 14,
XML_ELEMENT_DECL= 15,
XML_ATTRIBUTE_DECL= 16,
XML_ENTITY_DECL= 17
} xmlElementType;
/*
* Size of an internal character representation.
*
* Currently we use 8bit chars internal representation for memory efficiency,
* but the parser is not tied to that, just define UNICODE to switch to
* a 16 bits internal representation. Note that with 8 bits wide
* xmlChars one can still use UTF-8 to handle correctly non ISO-Latin
* input.
* We use 8bit chars internal representation for memory efficiency,
* Note that with 8 bits wide xmlChars one can still use UTF-8 to handle
* correctly non ISO-Latin input.
*/
#ifdef UNICODE
typedef unsigned short xmlChar;
#else
typedef unsigned char xmlChar;
#endif
#ifndef WIN32
#ifndef CHAR
@ -109,14 +107,25 @@ struct _xmlEnumeration {
typedef struct _xmlAttribute xmlAttribute;
typedef xmlAttribute *xmlAttributePtr;
struct _xmlAttribute {
const xmlChar *elem; /* Element holding the attribute */
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */
const xmlChar *name; /* Attribute name */
struct _xmlAttribute *next; /* list of attributes of an element */
xmlAttributeType type; /* The type */
struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
struct _xmlAttribute *nexth; /* next in hash table */
xmlAttributeType atype; /* The attribute type */
xmlAttributeDefault def; /* the default */
const xmlChar *defaultValue; /* or the default value */
xmlEnumerationPtr tree; /* or the enumeration tree if any */
const xmlChar *prefix; /* the namespace prefix if any */
const xmlChar *elem; /* Element holding the attribute */
};
/*
@ -156,8 +165,19 @@ typedef enum {
typedef struct _xmlElement xmlElement;
typedef xmlElement *xmlElementPtr;
struct _xmlElement {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_ELEMENT_DECL, must be second ! */
const xmlChar *name; /* Element name */
xmlElementTypeVal type; /* The type */
struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlElementTypeVal etype; /* The type */
xmlElementContentPtr content; /* the allowed element content */
xmlAttributePtr attributes; /* List of the declared attributes */
};
@ -188,14 +208,25 @@ struct _xmlNs {
typedef struct _xmlDtd xmlDtd;
typedef xmlDtd *xmlDtdPtr;
struct _xmlDtd {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
#endif
xmlElementType type; /* XML_DTD_NODE, must be second ! */
const xmlChar *name; /* Name of the DTD */
const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */
struct _xmlNode *children; /* the value of the property link */
struct _xmlNode *last; /* last child link */
struct _xmlDoc *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
/* End of common part */
void *notations; /* Hash table for notations if any */
void *elements; /* Hash table for elements if any */
void *attributes; /* Hash table for attributes if any */
void *entities; /* Hash table for entities if any */
/* struct xmlDtd *next; * next link for this document */
const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */
};
/*
@ -206,14 +237,17 @@ typedef xmlAttr *xmlAttrPtr;
struct _xmlAttr {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif
xmlElementType type; /* XML_ATTRIBUTE_NODE, must be third ! */
struct _xmlNode *node; /* attr->node link */
struct _xmlAttr *next; /* attribute list link */
xmlElementType type; /* XML_ATTRIBUTE_NODE, must be second ! */
const xmlChar *name; /* the name of the property */
struct _xmlNode *val; /* the value of the property */
struct _xmlNode *children; /* the value of the property */
struct _xmlNode *last; /* NULL */
struct _xmlNode *parent; /* child->parent link */
struct _xmlAttr *next; /* next sibling link */
struct _xmlAttr *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */
xmlNs *ns; /* pointer to the associated namespace */
xmlAttributeType atype; /* the attribute type if validating */
};
/*
@ -266,24 +300,25 @@ typedef xmlNode *xmlNodePtr;
struct _xmlNode {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif
xmlElementType type; /* type number in the DTD, must be third ! */
struct _xmlDoc *doc; /* the containing document */
xmlElementType type; /* type number, must be second ! */
const xmlChar *name; /* the name of the node, or the entity */
struct _xmlNode *children; /* parent->childs link */
struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlNode *childs; /* parent->childs link */
struct _xmlNode *last; /* last child link */
struct _xmlAttr *properties;/* properties list */
const xmlChar *name; /* the name of the node, or the entity */
struct _xmlDoc *doc; /* the containing document */
xmlNs *ns; /* pointer to the associated namespace */
xmlNs *nsDef; /* namespace definitions on this node */
#ifndef XML_USE_BUFFER_CONTENT
xmlChar *content; /* the content */
#else
xmlBufferPtr content; /* the content in a buffer */
#endif
/* End of common part */
struct _xmlAttr *properties;/* properties list */
xmlNs *nsDef; /* namespace definitions on this node */
};
/*
@ -294,20 +329,27 @@ typedef xmlDoc *xmlDocPtr;
struct _xmlDoc {
#ifndef XML_WITHOUT_CORBA
void *_private; /* for Corba, must be first ! */
void *vepv; /* for Corba, must be next ! */
#endif
xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */
char *name; /* name/filename/URI of the document */
const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* encoding, if any */
struct _xmlNode *children; /* the document tree */
struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* autoreference to itself */
/* End of common part */
int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */
struct _xmlDtd *intSubset; /* the document internal subset */
struct _xmlDtd *extSubset; /* the document external subset */
struct _xmlNs *oldNs; /* Global namespace, the old way */
struct _xmlNode *root; /* the document tree */
const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* encoding, if any */
void *ids; /* Hash table for ID attributes if any */
void *refs; /* Hash table for IDREFs attributes if any */
const xmlChar *URL; /* The URI for that document */
};
/*
@ -422,6 +464,8 @@ xmlNodePtr xmlNewComment (const xmlChar *content);
xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc,
const xmlChar *content,
int len);
xmlNodePtr xmlNewCharRef (xmlDocPtr doc,
const xmlChar *name);
xmlNodePtr xmlNewReference (xmlDocPtr doc,
const xmlChar *name);
xmlNodePtr xmlCopyNode (xmlNodePtr node,
@ -513,13 +557,14 @@ xmlChar * xmlNodeGetContent (xmlNodePtr cur);
xmlChar * xmlNodeGetLang (xmlNodePtr cur);
void xmlNodeSetLang (xmlNodePtr cur,
const xmlChar *lang);
int xmlNodeGetSpacePreserve (xmlNodePtr cur);
xmlChar * xmlNodeGetBase (xmlDocPtr doc,
xmlNodePtr cur);
/*
* Removing content.
*/
int xmlRemoveProp (xmlAttrPtr attr); /* TODO */
int xmlRemoveProp (xmlAttrPtr attr);
int xmlRemoveNode (xmlNodePtr node); /* TODO */
/*
@ -532,6 +577,12 @@ void xmlBufferWriteChar (xmlBufferPtr buf,
void xmlBufferWriteQuotedString(xmlBufferPtr buf,
const xmlChar *string);
/*
* Namespace handling
*/
int xmlReconciliateNs (xmlDocPtr doc,
xmlNodePtr tree);
/*
* Saving
*/

888
valid.c

File diff suppressed because it is too large Load Diff

24
valid.h
View File

@ -29,6 +29,14 @@ struct _xmlValidCtxt {
void *userData; /* user specific data block */
xmlValidityErrorFunc error; /* the callback in case of errors */
xmlValidityWarningFunc warning; /* the callback in case of warning */
/* Node analysis stack used when validating within entities */
xmlNodePtr node; /* Current parsed Node */
int nodeNr; /* Depth of the parsing stack */
int nodeMax; /* Max depth of the parsing stack */
xmlNodePtr *nodeTab; /* array of nodes */
int finishDtd; /* finished validating the Dtd ? */
};
/*
@ -114,6 +122,8 @@ xmlNotationPtr xmlAddNotationDecl (xmlValidCtxtPtr ctxt,
const xmlChar *SystemID);
xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table);
void xmlFreeNotationTable(xmlNotationTablePtr table);
void xmlDumpNotationDecl (xmlBufferPtr buf,
xmlNotationPtr nota);
void xmlDumpNotationTable(xmlBufferPtr buf,
xmlNotationTablePtr table);
@ -122,6 +132,9 @@ xmlElementContentPtr xmlNewElementContent (xmlChar *name,
xmlElementContentType type);
xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content);
void xmlFreeElementContent(xmlElementContentPtr cur);
void xmlSprintfElementContent(char *buf,
xmlElementContentPtr content,
int glob);
/* Element */
xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt,
@ -133,6 +146,8 @@ xmlElementTablePtr xmlCopyElementTable (xmlElementTablePtr table);
void xmlFreeElementTable (xmlElementTablePtr table);
void xmlDumpElementTable (xmlBufferPtr buf,
xmlElementTablePtr table);
void xmlDumpElementDecl (xmlBufferPtr buf,
xmlElementPtr elem);
/* Enumeration */
xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name);
@ -144,6 +159,7 @@ xmlAttributePtr xmlAddAttributeDecl (xmlValidCtxtPtr ctxt,
xmlDtdPtr dtd,
const xmlChar *elem,
const xmlChar *name,
const xmlChar *prefix,
xmlAttributeType type,
xmlAttributeDefault def,
const xmlChar *defaultValue,
@ -152,6 +168,8 @@ xmlAttributeTablePtr xmlCopyAttributeTable (xmlAttributeTablePtr table);
void xmlFreeAttributeTable (xmlAttributeTablePtr table);
void xmlDumpAttributeTable (xmlBufferPtr buf,
xmlAttributeTablePtr table);
void xmlDumpAttributeDecl (xmlBufferPtr buf,
xmlAttributePtr attr);
/* IDs */
xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt,
@ -188,6 +206,10 @@ int xmlValidateRoot (xmlValidCtxtPtr ctxt,
int xmlValidateElementDecl (xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlElementPtr elem);
xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc,
xmlNodePtr elem,
const xmlChar *name,
const xmlChar *value);
int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlAttributePtr attr);
@ -199,6 +221,8 @@ int xmlValidateNotationDecl (xmlValidCtxtPtr ctxt,
int xmlValidateDtd (xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlDtdPtr dtd);
int xmlValidateDtdFinal (xmlValidCtxtPtr ctxt,
xmlDocPtr doc);
int xmlValidateDocument (xmlValidCtxtPtr ctxt,
xmlDocPtr doc);
int xmlValidateElement (xmlValidCtxtPtr ctxt,

View File

@ -115,8 +115,22 @@ typedef enum {
XML_ERR_ENCODING_NAME, /* 80 */
XML_ERR_HYPHEN_IN_COMMENT /* 81 */
XML_ERR_HYPHEN_IN_COMMENT, /* 81 */
XML_ERR_INVALID_ENCODING, /* 82 */
XML_ERR_EXT_ENTITY_STANDALONE, /* 83 */
XML_ERR_CONDSEC_INVALID, /* 84 */
XML_ERR_VALUE_REQUIRED, /* 85 */
XML_ERR_NOT_WELL_BALANCED, /* 86 */
XML_ERR_EXTRA_CONTENT, /* 87 */
XML_ERR_ENTITY_CHAR_ERROR, /* 88 */
XML_ERR_ENTITY_PE_INTERNAL, /* 88 */
XML_ERR_ENTITY_LOOP, /* 89 */
XML_ERR_ENTITY_BOUNDARY /* 90 */
}xmlParserErrors;
void xmlParserError (void *ctx,

43
xmlIO.c
View File

@ -118,6 +118,7 @@ xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
* If filename is "-' then we use stdin as the input.
* Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time.
* Do an encoding check if enc == XML_CHAR_ENCODING_NONE
*
* Returns the new parser input or NULL
*/
@ -201,13 +202,10 @@ xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
}
#endif
}
/*
* TODO : get the 4 first bytes and decode the charset
* if enc == XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines here. !!!
* enc = xmlDetectCharEncoding(buffer);
*/
/*
* Allocate the Input buffer front-end.
*/
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL) {
#ifdef HAVE_ZLIB_H
@ -218,7 +216,6 @@ xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
ret->httpIO = httpIO;
ret->ftpIO = ftpIO;
}
xmlParserInputBufferRead(ret, 4);
return(ret);
}
@ -289,19 +286,30 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
if (len < 0) return(0);
if (in->encoder != NULL) {
xmlChar *buffer;
int processed = len;
buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
if (buffer == NULL) {
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
xmlFree(buffer);
return(-1);
}
nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
(xmlChar *) buf, len);
(xmlChar *) buf, &processed);
/*
* TODO : we really need to have something atomic or the
* encoder must report the number of bytes read
*/
if (nbchars < 0) {
fprintf(stderr, "xmlParserInputBufferPush: encoder error\n");
xmlFree(buffer);
return(-1);
}
if (processed != len) {
fprintf(stderr,
"TODO xmlParserInputBufferPush: processed != len\n");
xmlFree(buffer);
return(-1);
}
buffer[nbchars] = 0;
xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
xmlFree(buffer);
@ -382,6 +390,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
}
if (in->encoder != NULL) {
xmlChar *buf;
int wrote = res;
buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
if (buf == NULL) {
@ -390,10 +399,24 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
return(-1);
}
nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
BAD_CAST buffer, res);
BAD_CAST buffer, &wrote);
buf[nbchars] = 0;
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
xmlFree(buf);
/*
* Check that the encoder was able to process the full input
*/
if (wrote != res) {
fprintf(stderr,
"TODO : xmlParserInputBufferGrow wrote %d != res %d\n",
wrote, res);
/*
* TODO !!!
* Need to keep the unprocessed input in a buffer in->unprocessed
*/
}
} else {
nbchars = res;
buffer[nbchars] = 0;

View File

@ -8,7 +8,7 @@
#ifndef _DEBUG_MEMORY_ALLOC_
#define _DEBUG_MEMORY_ALLOC_
#define NO_DEBUG_MEMORY
/* #define NO_DEBUG_MEMORY */
#ifdef NO_DEBUG_MEMORY
#ifdef HAVE_MALLOC_H

59
xpath.c
View File

@ -213,9 +213,9 @@ PUSH_AND_POP(xmlXPathObjectPtr, value)
* Dirty macros, i.e. one need to make assumption on the context to use them
*
* CUR_PTR return the current pointer to the xmlChar to be parsed.
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
* in ISO-Latin or UTF-8, and the current 16 bit value if compiled
* in UNICODE mode. This should be used internally by the parser
* CUR returns the current xmlChar value, i.e. a 8 bit value
* in ISO-Latin or UTF-8.
* This should be used internally by the parser
* only to compare to ASCII values otherwise it would break when
* running with UTF-8 encoding.
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
@ -237,11 +237,8 @@ PUSH_AND_POP(xmlXPathObjectPtr, value)
#define SKIP_BLANKS \
while (IS_BLANK(*(ctxt->cur))) NEXT
#ifndef USE_UTF_8
#define CURRENT (*ctxt->cur)
#define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur)
#else
#endif
/************************************************************************
* *
@ -877,7 +874,7 @@ xmlXPathFreeContext(xmlXPathContextPtr ctxt) {
fprintf(xmlXPathDebug, "%s:%d Internal error: no document\n", \
__FILE__, __LINE__); \
} \
if (ctxt->doc->root == NULL) { \
if (ctxt->doc->children == NULL) { \
fprintf(xmlXPathDebug, \
"%s:%d Internal error: document without root\n", \
__FILE__, __LINE__); \
@ -1496,14 +1493,18 @@ xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_PI_NODE:
case XML_COMMENT_NODE:
case XML_NOTATION_NODE:
return(ctxt->context->node->childs);
case XML_ATTRIBUTE_NODE:
return(NULL);
case XML_DTD_NODE:
return(ctxt->context->node->children);
case XML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
case XML_DOCUMENT_FRAG_NODE:
case XML_HTML_DOCUMENT_NODE:
return(((xmlDocPtr) ctxt->context->node)->root);
return(((xmlDocPtr) ctxt->context->node)->children);
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
case XML_ATTRIBUTE_NODE:
return(NULL);
}
return(NULL);
}
@ -1533,11 +1534,11 @@ xmlXPathNextDescendant(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
return(NULL);
if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc)
return(ctxt->context->doc->root);
return(ctxt->context->node->childs);
return(ctxt->context->doc->children);
return(ctxt->context->node->children);
}
if (cur->childs != NULL) return(cur->childs);
if (cur->children != NULL) return(cur->children);
if (cur->next != NULL) return(cur->next);
do {
@ -1606,13 +1607,17 @@ xmlXPathNextParent(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_PI_NODE:
case XML_COMMENT_NODE:
case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
if (ctxt->context->node->parent == NULL)
return((xmlNodePtr) ctxt->context->doc);
return(ctxt->context->node->parent);
case XML_ATTRIBUTE_NODE: {
xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node;
return(att->node);
return(att->parent);
}
case XML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
@ -1655,6 +1660,10 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_ENTITY_NODE:
case XML_PI_NODE:
case XML_COMMENT_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
case XML_NOTATION_NODE:
if (ctxt->context->node->parent == NULL)
return((xmlNodePtr) ctxt->context->doc);
@ -1662,7 +1671,7 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_ATTRIBUTE_NODE: {
xmlAttrPtr cur = (xmlAttrPtr) ctxt->context->node;
return(cur->node);
return(cur->parent);
}
case XML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
@ -1672,7 +1681,7 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
}
return(NULL);
}
if (cur == ctxt->context->doc->root)
if (cur == ctxt->context->doc->children)
return((xmlNodePtr) ctxt->context->doc);
if (cur == (xmlNodePtr) ctxt->context->doc)
return(NULL);
@ -1685,11 +1694,15 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
case XML_PI_NODE:
case XML_COMMENT_NODE:
case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
return(cur->parent);
case XML_ATTRIBUTE_NODE: {
xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node;
return(att->node);
return(att->parent);
}
case XML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
@ -1780,13 +1793,13 @@ xmlXPathNextFollowing(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
return(NULL);
if (cur == NULL)
return(ctxt->context->node->next);; /* !!!!!!!!! */
if (cur->childs != NULL) return(cur->childs);
if (cur->children != NULL) return(cur->children);
if (cur->next != NULL) return(cur->next);
do {
cur = cur->parent;
if (cur == NULL) return(NULL);
if (cur == ctxt->context->doc->root) return(NULL);
if (cur == ctxt->context->doc->children) return(NULL);
if (cur->next != NULL) {
cur = cur->next;
return(cur);
@ -1820,7 +1833,7 @@ xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
do {
cur = cur->parent;
if (cur == NULL) return(NULL);
if (cur == ctxt->context->doc->root) return(NULL);
if (cur == ctxt->context->doc->children) return(NULL);
if (cur->prev != NULL) {
cur = cur->prev;
return(cur);
@ -2278,7 +2291,7 @@ xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs) {
ID = xmlStrndup(tokens, cur - tokens);
attr = xmlGetID(ctxt->context->doc, ID);
if (attr != NULL) {
elem = attr->node;
elem = attr->parent;
xmlXPathNodeSetAdd(ret->nodesetval, elem);
}
if (ID != NULL)
@ -3677,6 +3690,8 @@ xmlXPathEvalPathExpr(xmlXPathParserContextPtr ctxt) {
if (name != NULL)
xmlFree(name);
}
if (ctxt->context->nodelist != NULL)
valuePush(ctxt, xmlXPathNewNodeSetList(ctxt->context->nodelist));
}
/**