1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-21 14:53:44 +03:00

doc: Misc fixes to parser docs

This commit is contained in:
Nick Wellnhofer
2025-05-04 02:15:25 +02:00
parent b7274fb02f
commit 1bf44f09ba
4 changed files with 358 additions and 266 deletions

View File

@@ -1,9 +1,9 @@
/** /**
* @file * @file
* *
* @brief the core parser module * @brief Validating XML 1.0 parser
* *
* Interfaces, constants and types related to the XML parser * Interfaces, constants and types related to the XML parser.
* *
* @copyright See Copyright for the status of this software. * @copyright See Copyright for the status of this software.
* *
@@ -92,12 +92,7 @@ typedef enum {
typedef void (* xmlParserInputDeallocate)(xmlChar *str); typedef void (* xmlParserInputDeallocate)(xmlChar *str);
/** /**
* An xmlParserInput is an input flow for the XML processor. * Parser input
* Each entity parsed is associated an xmlParserInput (except the
* few predefined ones). This is the case both for internal entities
* - in which case the flow is already completely in memory - or
* external entities - in which case we use the buf structure for
* progressive reading and I18N conversions to the internal UTF-8 format.
*/ */
struct _xmlParserInput { struct _xmlParserInput {
/* Input buffer */ /* Input buffer */
@@ -187,14 +182,12 @@ typedef enum {
XML_PARSER_XML_DECL /* before XML decl (but after BOM) */ XML_PARSER_XML_DECL /* before XML decl (but after BOM) */
} xmlParserInputState; } xmlParserInputState;
/** @cond IGNORE */
/* /*
* Internal bits in the 'loadsubset' context member * Internal bits in the 'loadsubset' context member
*/ */
#define XML_DETECT_IDS 2 #define XML_DETECT_IDS 2
#define XML_COMPLETE_ATTRS 4 #define XML_COMPLETE_ATTRS 4
#define XML_SKIP_IDS 8 #define XML_SKIP_IDS 8
/** @endcond */
/* /*
* Internal type. Only XML_PARSE_READER is used. * Internal type. Only XML_PARSE_READER is used.
@@ -234,8 +227,8 @@ typedef xmlParserErrors
xmlResourceType type, xmlParserInputFlags flags, xmlResourceType type, xmlParserInputFlags flags,
xmlParserInputPtr *out); xmlParserInputPtr *out);
/* /**
* The parser context. * Parser context
*/ */
struct _xmlParserCtxt { struct _xmlParserCtxt {
/* The SAX handler */ /* The SAX handler */
@@ -846,28 +839,50 @@ typedef void (*endElementNsSAX2Func) (void *ctx,
const xmlChar *prefix, const xmlChar *prefix,
const xmlChar *URI); const xmlChar *URI);
/**
struct _xmlSAXHandler { * Callbacks for SAX parser
/* *
* For DTD-related handlers, it's recommended to either use the * For DTD-related handlers, it's recommended to either use the
* original libxml2 handler or set them to NULL if DTDs can be * original libxml2 handler or set them to NULL if DTDs can be
* ignored. * ignored.
*/ */
internalSubsetSAXFunc internalSubset; /* DTD */ struct _xmlSAXHandler {
isStandaloneSAXFunc isStandalone; /* unused */ /** @brief DTD */
hasInternalSubsetSAXFunc hasInternalSubset; /* DTD */ internalSubsetSAXFunc internalSubset;
hasExternalSubsetSAXFunc hasExternalSubset; /* DTD */ /** @brief unused */
resolveEntitySAXFunc resolveEntity; /* DTD */ isStandaloneSAXFunc isStandalone;
getEntitySAXFunc getEntity; /* DTD */ /** @brief DTD */
entityDeclSAXFunc entityDecl; /* DTD */ hasInternalSubsetSAXFunc hasInternalSubset;
notationDeclSAXFunc notationDecl; /* DTD */ /** @brief DTD */
attributeDeclSAXFunc attributeDecl; /* DTD */ hasExternalSubsetSAXFunc hasExternalSubset;
elementDeclSAXFunc elementDecl; /* DTD */ /** @brief DTD */
unparsedEntityDeclSAXFunc unparsedEntityDecl; /* DTD */ resolveEntitySAXFunc resolveEntity;
setDocumentLocatorSAXFunc setDocumentLocator; /* deprecated */ /** @brief DTD */
getEntitySAXFunc getEntity;
/** @brief DTD */
entityDeclSAXFunc entityDecl;
/** @brief DTD */
notationDeclSAXFunc notationDecl;
/** @brief DTD */
attributeDeclSAXFunc attributeDecl;
/** @brief DTD */
elementDeclSAXFunc elementDecl;
/** @brief DTD */
unparsedEntityDeclSAXFunc unparsedEntityDecl;
/** @brief useless */
setDocumentLocatorSAXFunc setDocumentLocator;
/**
* @brief Called at the start of a document
*
* Use xmlCtxtGetVersion(), xmlCtxtGetDeclaredEncoding() and
* xmlCtxtGetStandalone() to get data from the XML declaration.
*/
startDocumentSAXFunc startDocument; startDocumentSAXFunc startDocument;
/** @brief End of document */
endDocumentSAXFunc endDocument; endDocumentSAXFunc endDocument;
/* /**
* @brief Legacy start tag handler
*
* `startElement` and `endElement` are only used by the legacy SAX1 * `startElement` and `endElement` are only used by the legacy SAX1
* interface and should not be used in new software. If you really * interface and should not be used in new software. If you really
* have to enable SAX1, the preferred way is set the `initialized` * have to enable SAX1, the preferred way is set the `initialized`
@@ -881,37 +896,53 @@ struct _xmlSAXHandler {
* together with custom SAX callbacks. * together with custom SAX callbacks.
*/ */
startElementSAXFunc startElement; startElementSAXFunc startElement;
/** @brief See _xmlSAXHandler.startElement */
endElementSAXFunc endElement; endElementSAXFunc endElement;
/** @brief Entity reference */
referenceSAXFunc reference; referenceSAXFunc reference;
/** @brief Text */
charactersSAXFunc characters; charactersSAXFunc characters;
/* /**
* @brief Ignorable whitespace
*
* `ignorableWhitespace` should always be set to the same value * `ignorableWhitespace` should always be set to the same value
* as `characters`. Otherwise, the parser will try to detect * as `characters`. Otherwise, the parser will try to detect
* whitespace which is unreliable. * whitespace which is unreliable.
*/ */
ignorableWhitespaceSAXFunc ignorableWhitespace; ignorableWhitespaceSAXFunc ignorableWhitespace;
/** @brief Processing instruction */
processingInstructionSAXFunc processingInstruction; processingInstructionSAXFunc processingInstruction;
/** @brief Comment */
commentSAXFunc comment; commentSAXFunc comment;
/** @brief Warning message */
warningSAXFunc warning; warningSAXFunc warning;
/** @brief Error message */
errorSAXFunc error; errorSAXFunc error;
fatalErrorSAXFunc fatalError; /* unused, `error` gets all the errors */ /** @brief Unused, all errors go to `error`. */
getParameterEntitySAXFunc getParameterEntity; /* DTD */ fatalErrorSAXFunc fatalError;
/** @brief DTD */
getParameterEntitySAXFunc getParameterEntity;
cdataBlockSAXFunc cdataBlock; cdataBlockSAXFunc cdataBlock;
externalSubsetSAXFunc externalSubset; /* DTD */ /** @brief DTD */
/* externalSubsetSAXFunc externalSubset;
* `initialized` should always be set to XML_SAX2_MAGIC to enable the /**
* modern SAX2 interface. * @brief Legacy magic value
*
* `initialized` should always be set to XML_SAX2_MAGIC to
* enable the modern SAX2 interface.
*/ */
unsigned int initialized; unsigned int initialized;
/* /** @brief Application data */
* The following members are only used by the SAX2 interface.
*/
void *_private; void *_private;
/** @brief Start tag */
startElementNsSAX2Func startElementNs; startElementNsSAX2Func startElementNs;
/** @brief End tag */
endElementNsSAX2Func endElementNs; endElementNsSAX2Func endElementNs;
/* /**
* @brief Structured error handler.
*
* Takes precedence over `error` or `warning`, but modern code * Takes precedence over `error` or `warning`, but modern code
* should use xmlCtxtSetErrorHandler. * should use xmlCtxtSetErrorHandler().
*/ */
xmlStructuredErrorFunc serror; xmlStructuredErrorFunc serror;
}; };
@@ -972,7 +1003,6 @@ typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
XMLPUBVAR const char *const xmlParserVersion; XMLPUBVAR const char *const xmlParserVersion;
/** @cond IGNORE */
XML_DEPRECATED XML_DEPRECATED
XMLPUBVAR const xmlSAXLocator xmlDefaultSAXLocator; XMLPUBVAR const xmlSAXLocator xmlDefaultSAXLocator;
#ifdef LIBXML_SAX1_ENABLED #ifdef LIBXML_SAX1_ENABLED
@@ -1005,25 +1035,85 @@ XMLPUBFUN int *__xmlSaveNoEmptyTags(void);
#endif #endif
#ifndef XML_GLOBALS_NO_REDEFINITION #ifndef XML_GLOBALS_NO_REDEFINITION
/**
* Thread-local setting to enable validation. Defaults to 0.
*
* @deprecated Use the parser option XML_PARSE_DTDVALID.
*/
#define xmlDoValidityCheckingDefaultValue \ #define xmlDoValidityCheckingDefaultValue \
(*__xmlDoValidityCheckingDefaultValue()) (*__xmlDoValidityCheckingDefaultValue())
/**
* Thread-local setting to disable warnings. Defaults to 1.
*
* @deprecated Use the parser option XML_PARSE_NOWARNING.
*/
#define xmlGetWarningsDefaultValue \ #define xmlGetWarningsDefaultValue \
(*__xmlGetWarningsDefaultValue()) (*__xmlGetWarningsDefaultValue())
/**
* Thread-local setting to ignore some whitespace. Defaults
* to 1.
*
* @deprecated Use the parser option XML_PARSE_NOBLANKS.
*/
#define xmlKeepBlanksDefaultValue (*__xmlKeepBlanksDefaultValue()) #define xmlKeepBlanksDefaultValue (*__xmlKeepBlanksDefaultValue())
/**
* Thread-local setting to store line numbers. Defaults
* to 0, but is always enabled after setting parser options.
*
* @deprecated Shouldn't be needed when using parser options.
*/
#define xmlLineNumbersDefaultValue \ #define xmlLineNumbersDefaultValue \
(*__xmlLineNumbersDefaultValue()) (*__xmlLineNumbersDefaultValue())
/**
* Thread-local setting to enable loading of external DTDs.
* Defaults to 0.
*
* @deprecated Use the parser option XML_PARSE_DTDLOAD.
*/
#define xmlLoadExtDtdDefaultValue (*__xmlLoadExtDtdDefaultValue()) #define xmlLoadExtDtdDefaultValue (*__xmlLoadExtDtdDefaultValue())
/**
* Thread-local setting to enable pedantic warnings.
* Defaults to 0.
*
* @deprecated Use the parser option XML_PARSE_PEDANTIC.
*/
#define xmlPedanticParserDefaultValue \ #define xmlPedanticParserDefaultValue \
(*__xmlPedanticParserDefaultValue()) (*__xmlPedanticParserDefaultValue())
/**
* Thread-local setting to enable entity substitution.
* Defaults to 0.
*
* @deprecated Use the parser option XML_PARSE_NOENT.
*/
#define xmlSubstituteEntitiesDefaultValue \ #define xmlSubstituteEntitiesDefaultValue \
(*__xmlSubstituteEntitiesDefaultValue()) (*__xmlSubstituteEntitiesDefaultValue())
#ifdef LIBXML_OUTPUT_ENABLED #ifdef LIBXML_OUTPUT_ENABLED
/**
* Thread-local setting to disable indenting when
* formatting output. Defaults to 1.
*
* @deprecated Use the xmlsave.h API with option
* XML_SAVE_NO_INDENT.
*/
#define xmlIndentTreeOutput (*__xmlIndentTreeOutput()) #define xmlIndentTreeOutput (*__xmlIndentTreeOutput())
/**
* Thread-local setting to change the indent string.
* Defaults to two spaces.
*
* @deprecated Use the xmlsave.h API and
* xmlSaveSetIndentString().
*/
#define xmlTreeIndentString (*__xmlTreeIndentString()) #define xmlTreeIndentString (*__xmlTreeIndentString())
/**
* Thread-local setting to disable empty tags when
* serializing. Defaults to 0.
*
* @deprecated Use the xmlsave.h API with option
* XML_SAVE_NO_EMPTY.
*/
#define xmlSaveNoEmptyTags (*__xmlSaveNoEmptyTags()) #define xmlSaveNoEmptyTags (*__xmlSaveNoEmptyTags())
#endif #endif
#endif #endif
/** @endcond */
/* /*
* Init/Cleanup * Init/Cleanup
@@ -1666,7 +1756,7 @@ XMLPUBFUN xmlDocPtr
const char *encoding, const char *encoding,
int options); int options);
/** /*
* New input API * New input API
*/ */

View File

@@ -1,11 +1,10 @@
/** /**
* @file * @file
* *
* @brief internals routines and limits exported by the parser. * @brief Internals routines and limits exported by the parser.
* *
* this module exports a number of internal parsing routines * Except for some I/O-related functions, most of these macros and
* they are not really all intended for applications but * functions are deprecated.
* can prove useful doing low level processing.
* *
* @copyright See Copyright for the status of this software. * @copyright See Copyright for the status of this software.
* *
@@ -87,7 +86,8 @@ extern "C" {
* *
* Macro to check the following production in the XML spec: * Macro to check the following production in the XML spec:
* *
* [2] Char ::= \#x9 | \#xA | \#xD | [\#x20...] * [2] Char ::= #x9 | #xA | #xD | [#x20...]
*
* any byte character in the accepted range * any byte character in the accepted range
*/ */
#define IS_BYTE_CHAR(c) xmlIsChar_ch(c) #define IS_BYTE_CHAR(c) xmlIsChar_ch(c)
@@ -97,8 +97,9 @@ extern "C" {
* *
* Macro to check the following production in the XML spec: * Macro to check the following production in the XML spec:
* *
* [2] Char ::= \#x9 | \#xA | \#xD | [\#x20-\#xD7FF] | [\#xE000-\#xFFFD] * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
* | [\#x10000-\#x10FFFF] * | [#x10000-#x10FFFF]
*
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/ */
#define IS_CHAR(c) xmlIsCharQ(c) #define IS_CHAR(c) xmlIsCharQ(c)
@@ -115,7 +116,7 @@ extern "C" {
* *
* Macro to check the following production in the XML spec: * Macro to check the following production in the XML spec:
* *
* [3] S ::= (\#x20 | \#x9 | \#xD | \#xA)+ * [3] S ::= (#x20 | #x9 | #xD | #xA)+
*/ */
#define IS_BLANK(c) xmlIsBlankQ(c) #define IS_BLANK(c) xmlIsBlankQ(c)
@@ -172,10 +173,9 @@ extern "C" {
* *
* Macro to check the following production in the XML spec: * Macro to check the following production in the XML spec:
* *
* * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
* [89] Extender ::= \#x00B7 | \#x02D0 | \#x02D1 | \#x0387 | \#x0640 | * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
* \#x0E46 | \#x0EC6 | \#x3005 | [\#x3031-\#x3035] | * [#x309D-#x309E] | [#x30FC-#x30FE]
* [\#x309D-\#x309E] | [\#x30FC-\#x30FE]
*/ */
#define IS_EXTENDER(c) xmlIsExtenderQ(c) #define IS_EXTENDER(c) xmlIsExtenderQ(c)
@@ -191,8 +191,7 @@ extern "C" {
* *
* Macro to check the following production in the XML spec: * Macro to check the following production in the XML spec:
* *
* * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
* [86] Ideographic ::= [\#x4E00-\#x9FA5] | \#x3007 | [\#x3021-\#x3029]
*/ */
#define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c) #define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)
@@ -201,7 +200,6 @@ extern "C" {
* *
* Macro to check the following production in the XML spec: * Macro to check the following production in the XML spec:
* *
*
* [84] Letter ::= BaseChar | Ideographic * [84] Letter ::= BaseChar | Ideographic
*/ */
#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
@@ -236,8 +234,8 @@ extern "C" {
* *
* Macro to check the following production in the XML spec: * Macro to check the following production in the XML spec:
* *
* * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
* [13] PubidChar ::= \#x20 | \#xD | \#xA | [a-zA-Z0-9] | [-'()+,./:=?;!*\#@$_%] * [-'()+,./:=?;!*#@$_%]
*/ */
#define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c) #define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c)
@@ -248,7 +246,7 @@ extern "C" {
*/ */
#define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c) #define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)
/** /*
* Global variables used for predefined strings. * Global variables used for predefined strings.
*/ */
XMLPUBVAR const xmlChar xmlStringText[]; XMLPUBVAR const xmlChar xmlStringText[];
@@ -259,7 +257,7 @@ XMLPUBVAR const xmlChar xmlStringComment[];
XML_DEPRECATED XML_DEPRECATED
XMLPUBFUN int xmlIsLetter (int c); XMLPUBFUN int xmlIsLetter (int c);
/** /*
* Parser context. * Parser context.
*/ */
XMLPUBFUN xmlParserCtxtPtr XMLPUBFUN xmlParserCtxtPtr
@@ -292,7 +290,7 @@ XMLPUBFUN int
xmlParserInputPtr input, xmlParserInputPtr input,
xmlCharEncodingHandlerPtr handler); xmlCharEncodingHandlerPtr handler);
/** /*
* Input Streams. * Input Streams.
*/ */
XMLPUBFUN xmlParserInputPtr XMLPUBFUN xmlParserInputPtr
@@ -322,7 +320,7 @@ XMLPUBFUN xmlParserInputPtr
XMLPUBFUN xmlParserInputPtr XMLPUBFUN xmlParserInputPtr
xmlNewInputStream (xmlParserCtxtPtr ctxt); xmlNewInputStream (xmlParserCtxtPtr ctxt);
/** /*
* Namespaces. * Namespaces.
*/ */
XMLPUBFUN xmlChar * XMLPUBFUN xmlChar *
@@ -330,7 +328,7 @@ XMLPUBFUN xmlChar *
const xmlChar *name, const xmlChar *name,
xmlChar **prefix); xmlChar **prefix);
/** /*
* Generic production rules. * Generic production rules.
*/ */
XML_DEPRECATED XML_DEPRECATED
@@ -482,23 +480,11 @@ XMLPUBFUN void
xmlParseExternalSubset (xmlParserCtxtPtr ctxt, xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
const xmlChar *ExternalID, const xmlChar *ExternalID,
const xmlChar *SystemID); const xmlChar *SystemID);
/**
* If no entities need to be substituted.
*/
#define XML_SUBSTITUTE_NONE 0
/**
* Whether general entities need to be substituted.
*/
#define XML_SUBSTITUTE_REF 1
/**
* Whether parameter entities need to be substituted.
*/
#define XML_SUBSTITUTE_PEREF 2
/**
* Both general and parameter entities need to be substituted.
*/
#define XML_SUBSTITUTE_BOTH 3
#define XML_SUBSTITUTE_NONE 0
#define XML_SUBSTITUTE_REF 1
#define XML_SUBSTITUTE_PEREF 2
#define XML_SUBSTITUTE_BOTH 3
XML_DEPRECATED XML_DEPRECATED
XMLPUBFUN xmlChar * XMLPUBFUN xmlChar *
xmlStringDecodeEntities (xmlParserCtxtPtr ctxt, xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,

View File

@@ -1229,6 +1229,7 @@ xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
* The current REC reference the successors of RFC 1766, currently 5646 * The current REC reference the successors of RFC 1766, currently 5646
* *
* http://www.rfc-editor.org/rfc/rfc5646.txt * http://www.rfc-editor.org/rfc/rfc5646.txt
*
* langtag = language * langtag = language
* ["-" script] * ["-" script]
* ["-" region] * ["-" region]
@@ -2539,8 +2540,8 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
* *
* Parse a numeric character reference. Always consumes '&'. * Parse a numeric character reference. Always consumes '&'.
* *
* [66] CharRef ::= '&\#' [0-9]+ ';' | * [66] CharRef ::= '&#' [0-9]+ ';' |
* '&\#x' [0-9a-fA-F]+ ';' * '&#x' [0-9a-fA-F]+ ';'
* *
* [ WFC: Legal Character ] * [ WFC: Legal Character ]
* Characters referred to using character references must match the * Characters referred to using character references must match the
@@ -2645,8 +2646,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
* parse Reference declarations, variant parsing from a string rather * parse Reference declarations, variant parsing from a string rather
* than an an input flow. * than an an input flow.
* *
* [66] CharRef ::= '&\#' [0-9]+ ';' | * [66] CharRef ::= '&#' [0-9]+ ';' |
* '&\#x' [0-9a-fA-F]+ ';' * '&#x' [0-9a-fA-F]+ ';'
* *
* [ WFC: Legal Character ] * [ WFC: Legal Character ]
* Characters referred to using character references must match the * Characters referred to using character references must match the
@@ -2759,7 +2760,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
* *
* A PEReference may have been detected in the current input stream * A PEReference may have been detected in the current input stream
* the handling is done accordingly to * the handling is done accordingly to
* http://www.w3.org/TR/REC-xml\#entproc * http://www.w3.org/TR/REC-xml#entproc
* i.e. * i.e.
* - Included in literal in entity values * - Included in literal in entity values
* - Included as Parameter Entity reference within DTDs * - Included as Parameter Entity reference within DTDs
@@ -3326,7 +3327,7 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
* *
* [5] Name ::= (Letter | '_' | ':') (NameChar)* * [5] Name ::= (Letter | '_' | ':') (NameChar)*
* *
* [6] Names ::= Name (\#x20 Name)* * [6] Names ::= Name (#x20 Name)*
* *
* @returns the Name parsed or NULL * @returns the Name parsed or NULL
*/ */
@@ -3524,7 +3525,7 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
* *
* [5] Name ::= (Letter | '_' | ':') (NameChar)* * [5] Name ::= (Letter | '_' | ':') (NameChar)*
* *
* [6] Names ::= Name (\#x20 Name)* * [6] Names ::= Name (#x20 Name)*
* *
* @returns the Name parsed or NULL. The `str` pointer * @returns the Name parsed or NULL. The `str` pointer
* is updated to the current location in the string. * is updated to the current location in the string.
@@ -3616,7 +3617,7 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
* *
* [7] Nmtoken ::= (NameChar)+ * [7] Nmtoken ::= (NameChar)+
* *
* [8] Nmtokens ::= Nmtoken (\#x20 Nmtoken)* * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
* *
* @returns the Nmtoken parsed or NULL * @returns the Nmtoken parsed or NULL
*/ */
@@ -4228,21 +4229,25 @@ xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
* @param isNamespace whether this is a namespace declaration * @param isNamespace whether this is a namespace declaration
* *
* parse a value for an attribute. * parse a value for an attribute.
*
* NOTE: if no normalization is needed, the routine will return pointers * NOTE: if no normalization is needed, the routine will return pointers
* directly from the data buffer. * directly from the data buffer.
* *
* 3.3.3 Attribute-Value Normalization: * 3.3.3 Attribute-Value Normalization:
*
* Before the value of an attribute is passed to the application or * Before the value of an attribute is passed to the application or
* checked for validity, the XML processor must normalize it as follows: * checked for validity, the XML processor must normalize it as follows:
*
* - a character reference is processed by appending the referenced * - a character reference is processed by appending the referenced
* character to the attribute value * character to the attribute value
* - an entity reference is processed by recursively processing the * - an entity reference is processed by recursively processing the
* replacement text of the entity * replacement text of the entity
* - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
* appending \#x20 to the normalized value, except that only a single * appending \#x20 to the normalized value, except that only a single
* \#x20 is appended for a "\#xD\#xA" sequence that is part of an external * \#x20 is appended for a "#xD#xA" sequence that is part of an external
* parsed entity or the literal entity value of an internal parsed entity * parsed entity or the literal entity value of an internal parsed entity
* - other characters are processed by appending them to the normalized value * - other characters are processed by appending them to the normalized value
*
* If the declared value is not CDATA, then the XML processor must further * If the declared value is not CDATA, then the XML processor must further
* process the normalized attribute value by discarding any leading and * process the normalized attribute value by discarding any leading and
* trailing space (\#x20) characters, and by replacing sequences of space * trailing space (\#x20) characters, and by replacing sequences of space
@@ -4497,17 +4502,20 @@ error:
* "'" ([^<&'] | Reference)* "'" * "'" ([^<&'] | Reference)* "'"
* *
* 3.3.3 Attribute-Value Normalization: * 3.3.3 Attribute-Value Normalization:
*
* Before the value of an attribute is passed to the application or * Before the value of an attribute is passed to the application or
* checked for validity, the XML processor must normalize it as follows: * checked for validity, the XML processor must normalize it as follows:
*
* - a character reference is processed by appending the referenced * - a character reference is processed by appending the referenced
* character to the attribute value * character to the attribute value
* - an entity reference is processed by recursively processing the * - an entity reference is processed by recursively processing the
* replacement text of the entity * replacement text of the entity
* - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
* appending \#x20 to the normalized value, except that only a single * appending \#x20 to the normalized value, except that only a single
* \#x20 is appended for a "\#xD\#xA" sequence that is part of an external * \#x20 is appended for a "#xD#xA" sequence that is part of an external
* parsed entity or the literal entity value of an internal parsed entity * parsed entity or the literal entity value of an internal parsed entity
* - other characters are processed by appending them to the normalized value * - other characters are processed by appending them to the normalized value
*
* If the declared value is not CDATA, then the XML processor must further * If the declared value is not CDATA, then the XML processor must further
* process the normalized attribute value by discarding any leading and * process the normalized attribute value by discarding any leading and
* trailing space (\#x20) characters, and by replacing sequences of space * trailing space (\#x20) characters, and by replacing sequences of space
@@ -4515,10 +4523,9 @@ error:
* All attributes for which no declaration has been read should be treated * All attributes for which no declaration has been read should be treated
* by a non-validating parser as if declared CDATA. * by a non-validating parser as if declared CDATA.
* *
* @returns the AttValue parsed or NULL. The value has to be freed by the caller. * @returns the AttValue parsed or NULL. The value has to be freed by the
* caller.
*/ */
xmlChar * xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt) { xmlParseAttValue(xmlParserCtxtPtr ctxt) {
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
@@ -4947,7 +4954,7 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
* Parse an External ID or a Public ID * Parse an External ID or a Public ID
* *
* NOTE: Productions [75] and [83] interact badly since [75] can generate * NOTE: Productions [75] and [83] interact badly since [75] can generate
* 'PUBLIC' S PubidLiteral S SystemLiteral * `'PUBLIC' S PubidLiteral S SystemLiteral`
* *
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
* | 'PUBLIC' S PubidLiteral S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral
@@ -5517,12 +5524,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
* *
* Parse a notation declaration. Always consumes '<!'. * Parse a notation declaration. Always consumes '<!'.
* *
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID)
* S? '>'
* *
* Hence there is actually 3 choices: * Hence there is actually 3 choices:
*
* 'PUBLIC' S PubidLiteral * 'PUBLIC' S PubidLiteral
* 'PUBLIC' S PubidLiteral S SystemLiteral * 'PUBLIC' S PubidLiteral S SystemLiteral
* and 'SYSTEM' S SystemLiteral * 'SYSTEM' S SystemLiteral
* *
* See the NOTE on xmlParseExternalID(). * See the NOTE on xmlParseExternalID().
*/ */
@@ -5835,7 +5844,7 @@ done:
* *
* Parse an attribute default declaration * Parse an attribute default declaration
* *
* [60] DefaultDecl ::= '\#REQUIRED' | '\#IMPLIED' | (('\#FIXED' S)? AttValue) * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
* *
* [ VC: Required Attribute ] * [ VC: Required Attribute ]
* if the default declaration is the keyword \#REQUIRED, then the * if the default declaration is the keyword \#REQUIRED, then the
@@ -6044,7 +6053,6 @@ xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
* *
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
* *
*
* @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
*/ */
@@ -6152,7 +6160,6 @@ xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
* *
* [53] AttDef ::= S Name S AttType S DefaultDecl * [53] AttDef ::= S Name S AttType S DefaultDecl
*
*/ */
void void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
@@ -6276,8 +6283,8 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
* parse the declaration for a Mixed Element content * parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl() * The leading '(' and spaces have been skipped in xmlParseElementContentDecl()
* *
* [51] Mixed ::= '(' S? '\#PCDATA' (S? '|' S? Name)* S? ')*' | * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
* '(' S? '\#PCDATA' S? ')' * '(' S? '#PCDATA' S? ')'
* *
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
* *
@@ -6388,7 +6395,6 @@ mem_error:
* parse the declaration for a Mixed Element content * parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl() * The leading '(' and spaces have been skipped in xmlParseElementContentDecl()
* *
*
* [47] children ::= (choice | seq) ('?' | '*' | '+')? * [47] children ::= (choice | seq) ('?' | '*' | '+')?
* *
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
@@ -6882,7 +6888,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
* [61] conditionalSect ::= includeSect | ignoreSect * [61] conditionalSect ::= includeSect | ignoreSect
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
* Ignore)*
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
*/ */
@@ -7159,7 +7166,8 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
* *
* [30] extSubset ::= textDecl? extSubsetDecl * [30] extSubset ::= textDecl? extSubsetDecl
* *
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * * [31] extSubsetDecl ::= (markupdecl | conditionalSect |
* PEReference | S) *
*/ */
void void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
@@ -10158,7 +10166,8 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) {
* *
* parse the XML encoding declaration * parse the XML encoding declaration
* *
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |
* "'" EncName "'")
* *
* this setups the conversion filters. * this setups the conversion filters.
* *
@@ -12100,7 +12109,8 @@ xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
* The allowed sequence for the Well Balanced Chunk is the one defined by * The allowed sequence for the Well Balanced Chunk is the one defined by
* the content production in the XML grammar: * the content production in the XML grammar:
* *
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* * [43] content ::= (element | CharData | Reference | CDSect | PI |
* Comment)*
* *
* @returns 0 if the chunk is well balanced, -1 in case of args problem and * @returns 0 if the chunk is well balanced, -1 in case of args problem and
* the parser error code otherwise * the parser error code otherwise
@@ -12270,7 +12280,8 @@ exit:
* The allowed sequence for the data is a Well Balanced Chunk defined by * The allowed sequence for the data is a Well Balanced Chunk defined by
* the content production in the XML grammar: * the content production in the XML grammar:
* *
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* * [43] content ::= (element | CharData | Reference | CDSect | PI |
* Comment)*
* *
* This function assumes the encoding of `node`'s document which is * This function assumes the encoding of `node`'s document which is
* typically not what you want. A better alternative is * typically not what you want. A better alternative is
@@ -12351,7 +12362,8 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
* The allowed sequence for the Well Balanced Chunk is the one defined by * The allowed sequence for the Well Balanced Chunk is the one defined by
* the content production in the XML grammar: * the content production in the XML grammar:
* *
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* * [43] content ::= (element | CharData | Reference | CDSect | PI |
* Comment)*
* *
* @returns 0 if the chunk is well balanced, or thehe parser error code * @returns 0 if the chunk is well balanced, or thehe parser error code
* otherwise. * otherwise.

View File

@@ -486,7 +486,10 @@ xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
* @deprecated Internal function, don't use. * @deprecated Internal function, don't use.
* *
* Check whether the character is allowed by the production * Check whether the character is allowed by the production
*
* ```
* [84] Letter ::= BaseChar | Ideographic * [84] Letter ::= BaseChar | Ideographic
* ```
* *
* @returns 0 if not, non-zero otherwise * @returns 0 if not, non-zero otherwise
*/ */
@@ -844,10 +847,12 @@ encoding_error:
* *
* The current char value, if using UTF-8 this may actually span multiple * The current char value, if using UTF-8 this may actually span multiple
* bytes in the input buffer. Implement the end of line normalization: * bytes in the input buffer. Implement the end of line normalization:
*
* 2.11 End-of-Line Handling * 2.11 End-of-Line Handling
*
* Wherever an external parsed entity or the literal entity value * Wherever an external parsed entity or the literal entity value
* of an internal parsed entity contains either the literal two-character * of an internal parsed entity contains either the literal two-character
* sequence "\#xD\#xA" or a standalone literal \#xD, an XML processor * sequence "#xD#xA" or a standalone literal \#xD, an XML processor
* must pass to the application the single character \#xA. * must pass to the application the single character \#xA.
* This behavior can conveniently be produced by normalizing all * This behavior can conveniently be produced by normalizing all
* line breaks to \#xA on input, before parsing.) * line breaks to \#xA on input, before parsing.)
@@ -3244,7 +3249,7 @@ xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
* *
* @deprecated Don't use. * @deprecated Don't use.
* *
* -- Initialize (set to initial state) node info sequence * Initialize (set to initial state) node info sequence
*/ */
void void
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
@@ -3261,8 +3266,7 @@ xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
* *
* @deprecated Don't use. * @deprecated Don't use.
* *
* -- Clear (release memory and reinitialize) node * Clear (release memory and reinitialize) node info sequence
* info sequence
*/ */
void void
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
@@ -3280,8 +3284,8 @@ xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
* *
* @deprecated Don't use. * @deprecated Don't use.
* *
* xmlParserFindNodeInfoIndex() : Find the index that the info record for * Find the index that the info record for the given node is or
* the given node is or should be at in a sorted sequence * should be at in a sorted sequence.
* *
* @returns a long indicating the position of the record * @returns a long indicating the position of the record
*/ */