From a7fc9e1add064b48896ee4726ada8fc7b321a9ff Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 22 Jul 2025 20:50:13 +0200 Subject: [PATCH] parser: Add more parser context accessors The only thing remaining is access to parser input, see #762. --- include/libxml/parser.h | 39 +++++++++------- parserInternals.c | 99 +++++++++++++++++++++++++++++++++++++++++ testapi.c | 4 ++ 3 files changed, 125 insertions(+), 17 deletions(-) diff --git a/include/libxml/parser.h b/include/libxml/parser.h index f134aa37..cedbe19f 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -243,9 +243,10 @@ struct _xmlParserCtxt { */ struct _xmlSAXHandler *sax; /** + * @deprecated Use #xmlCtxtGetUserData + * * user data for SAX interface, defaults to the context itself */ - /* TODO: Add accessor */ void *userData; /** * @deprecated Use xmlCtxtGetDocument() @@ -308,16 +309,10 @@ struct _xmlParserCtxt { /* Node analysis stack only used for DOM building */ /** + * @deprecated Use #xmlCtxtGetNode + * * The current element. - * - * This is only valid and useful if the default SAX callbacks - * which build a document tree are intercepted. This mode of - * operation is fragile and discouraged. - * - * Contains the current element whose content is being parsed, - * or NULL if the parser is in top-level or DTD content. */ - /* TODO: Add accessor */ xmlNode *node; /* Depth of the parsing stack */ int nodeNr XML_DEPRECATED_MEMBER; @@ -370,10 +365,11 @@ struct _xmlParserCtxt { int token XML_DEPRECATED_MEMBER; /** + * @deprecated Don't use + * * The main document URI, if available, with its last * component stripped. */ - /* TODO: Add accessor */ char *directory; /* Node name stack */ @@ -404,38 +400,36 @@ struct _xmlParserCtxt { */ int disableSAX XML_DEPRECATED_MEMBER; /** + * @deprecated Use xmlCtxtIsInSubset + * * Set if DTD content is parsed. * * - 0: not in DTD * - 1: in internal DTD subset * - 2: in external DTD subset */ - /* TODO: Add accessor */ int inSubset; /** * @deprecated Use the `name` argument of the - * `internalSubset` SAX callback. + * `internalSubset` SAX callback or #xmlCtxtGetDocTypeDecl * * Name of the internal subset (root element type). */ - /* TODO: Add accessor */ const xmlChar *intSubName; /** * @deprecated Use the `systemId` argument of the - * `internalSubset` SAX callback. + * `internalSubset` SAX callback or #xmlCtxtGetDocTypeDecl * * System identifier (URI) of external the subset. */ - /* TODO: Add accessor */ xmlChar *extSubURI; /** * @deprecated Use the `publicId` argument of the - * `internalSubset` SAX callback. + * `internalSubset` SAX callback or #xmlCtxtGetDocTypeDecl * * This member is MISNAMED. It contains the *public* identifier * of the external subset. */ - /* TODO: Add accessor */ xmlChar *extSubSystem; /* xml:space values */ @@ -1909,6 +1903,8 @@ XMLPUBFUN int xmlCtxtIsHtml (xmlParserCtxt *ctxt); XMLPUBFUN int xmlCtxtIsStopped (xmlParserCtxt *ctxt); +XMLPUBFUN int + xmlCtxtIsInSubset (xmlParserCtxt *ctxt); #ifdef LIBXML_VALID_ENABLED XMLPUBFUN xmlValidCtxt * xmlCtxtGetValidCtxt (xmlParserCtxt *ctxt); @@ -1921,6 +1917,15 @@ XMLPUBFUN int xmlCtxtGetStandalone (xmlParserCtxt *ctxt); XMLPUBFUN xmlParserStatus xmlCtxtGetStatus (xmlParserCtxt *ctxt); +XMLPUBFUN void * + xmlCtxtGetUserData (xmlParserCtxt *ctxt); +XMLPUBFUN xmlNode * + xmlCtxtGetNode (xmlParserCtxt *ctxt); +XMLPUBFUN int + xmlCtxtGetDocTypeDecl (xmlParserCtxt *ctxt, + const xmlChar **name, + const xmlChar **systemId, + const xmlChar **publicId); XMLPUBFUN void xmlCtxtSetErrorHandler (xmlParserCtxt *ctxt, xmlStructuredErrorFunc handler, diff --git a/parserInternals.c b/parserInternals.c index b1358216..bd5a562d 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -3211,6 +3211,30 @@ xmlCtxtIsStopped(xmlParserCtxt *ctxt) { return(ctxt->disableSAX != 0); } +/** + * Check whether a DTD subset is being parsed. + * + * Should only be used by SAX callbacks. + * + * Return values are + * + * - 0: not in DTD + * - 1: in internal DTD subset + * - 2: in external DTD subset + * + * @since 2.15.0 + * + * @param ctxt parser context + * @returns the subset status + */ +int +xmlCtxtIsInSubset(xmlParserCtxt *ctxt) { + if (ctxt == NULL) + return(0); + + return(ctxt->inSubset); +} + #ifdef LIBXML_VALID_ENABLED /** * @since 2.14.0 @@ -3227,6 +3251,81 @@ xmlCtxtGetValidCtxt(xmlParserCtxt *ctxt) { } #endif +/** + * Return user data. + * + * Return user data of a custom SAX parser or the parser context + * itself if unset. + * + * @since 2.15.0 + * + * @param ctxt parser context + * @returns the user data. + */ +void * +xmlCtxtGetUserData(xmlParserCtxt *ctxt) { + if (ctxt == NULL) + return NULL; + + return ctxt->userData; +} + +/** + * Return the current node being parsed. + * + * This is only useful if the default SAX callbacks which build + * a document tree are intercepted. This mode of operation is + * fragile and discouraged. + * + * Returns the current element node, or the document node if no + * element was parsed yet. + * + * @since 2.15.0 + * + * @param ctxt parser context + * @returns the current node. + */ +xmlNode * +xmlCtxtGetNode(xmlParserCtxt *ctxt) { + if (ctxt == NULL) + return NULL; + + if (ctxt->node != NULL) + return ctxt->node; + return (xmlNode *) ctxt->myDoc; +} + +/** + * Return data from the doctype declaration. + * + * Should only be used by SAX callbacks. + * + * @since 2.15.0 + * + * @param ctxt parser context + * @param name name of the root element (output) + * @param systemId system ID (URI) of the external subset (output) + * @param publicId public ID of the external subset (output) + * @returns 0 on success, -1 if argument is invalid + */ +int +xmlCtxtGetDocTypeDecl(xmlParserCtxt *ctxt, + const xmlChar **name, + const xmlChar **systemId, + const xmlChar **publicId) { + if (ctxt == NULL) + return -1; + + if (name != NULL) + *name = ctxt->intSubName; + if (systemId != NULL) + *systemId = ctxt->extSubURI; + if (publicId != NULL) + *publicId = ctxt->extSubSystem; /* The member is misnamed */ + + return 0; +} + /************************************************************************ * * * Handling of node information * diff --git a/testapi.c b/testapi.c index 059836e7..a3104532 100644 --- a/testapi.c +++ b/testapi.c @@ -144,15 +144,19 @@ main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { xmlCtxtGetCatalogs(NULL); xmlCtxtGetDeclaredEncoding(NULL); xmlDictFree(xmlCtxtGetDict(NULL)); + xmlCtxtGetDocTypeDecl(NULL, NULL, NULL, NULL); xmlFreeDoc(xmlCtxtGetDocument(NULL)); xmlCtxtGetLastError(NULL); + xmlFreeNode(xmlCtxtGetNode(NULL)); xmlCtxtGetOptions(NULL); xmlCtxtGetPrivate(NULL); xmlCtxtGetSaxHandler(NULL); xmlCtxtGetStandalone(NULL); xmlCtxtGetStatus(NULL); + xmlCtxtGetUserData(NULL); xmlCtxtGetVersion(NULL); xmlCtxtIsHtml(NULL); + xmlCtxtIsInSubset(NULL); xmlCtxtIsStopped(NULL); xmlFreeNode(xmlCtxtParseContent(NULL, NULL, NULL, 0)); xmlFreeDoc(xmlCtxtParseDocument(NULL, NULL));