1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

doc: Misc fixes to HTML parser docs

This commit is contained in:
Nick Wellnhofer
2025-05-03 16:34:02 +02:00
parent 411f30ef2a
commit b7274fb02f
2 changed files with 21 additions and 37 deletions

View File

@@ -4208,13 +4208,8 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
/** /**
* @param ctxt an HTML parser context * @param ctxt an HTML parser context
* *
* parse an HTML element, new version, non recursive * Parse an HTML element, new version, non recursive
*
* [39] element ::= EmptyElemTag | STag content ETag
*
* [41] Attribute ::= Name Eq AttValue
*/ */
static int static int
htmlParseElementInternal(htmlParserCtxtPtr ctxt) { htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
const xmlChar *name; const xmlChar *name;
@@ -4286,14 +4281,8 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
* *
* @deprecated Internal function, don't use. * @deprecated Internal function, don't use.
* *
* parse an HTML element, this is highly recursive * This is kept for compatibility with previous code versions
* this is kept for compatibility with previous code versions
*
* [39] element ::= EmptyElemTag | STag content ETag
*
* [41] Attribute ::= Name Eq AttValue
*/ */
void void
htmlParseElement(htmlParserCtxtPtr ctxt) { htmlParseElement(htmlParserCtxtPtr ctxt) {
const xmlChar *oldptr; const xmlChar *oldptr;
@@ -4386,7 +4375,6 @@ htmlCtxtParseContentInternal(htmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
* *
* @returns 0, -1 in case of error. * @returns 0, -1 in case of error.
*/ */
int int
htmlParseDocument(htmlParserCtxtPtr ctxt) { htmlParseDocument(htmlParserCtxtPtr ctxt) {
xmlDtdPtr dtd; xmlDtdPtr dtd;
@@ -4518,7 +4506,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
* *
* @returns 0 in case of success and -1 in case of error * @returns 0 in case of success and -1 in case of error
*/ */
static int static int
htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax, htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax,
void *userData) void *userData)
@@ -4604,9 +4591,8 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax,
* @param ctxt an HTML parser context * @param ctxt an HTML parser context
* *
* Free all the memory used by a parser context. However the parsed * Free all the memory used by a parser context. However the parsed
* document in ctxt->myDoc is not freed. * document in `ctxt->myDoc` is not freed.
*/ */
void void
htmlFreeParserCtxt(htmlParserCtxtPtr ctxt) htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
{ {
@@ -4627,7 +4613,6 @@ htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
* *
* @returns the htmlParserCtxtPtr or NULL in case of allocation error * @returns the htmlParserCtxtPtr or NULL in case of allocation error
*/ */
htmlParserCtxtPtr htmlParserCtxtPtr
htmlNewParserCtxt(void) htmlNewParserCtxt(void)
{ {
@@ -4638,20 +4623,18 @@ htmlNewParserCtxt(void)
* @param sax SAX handler * @param sax SAX handler
* @param userData user data * @param userData user data
* *
* Allocate and initialize a new HTML SAX parser context. If userData * Allocate and initialize a new HTML SAX parser context. If `userData`
* is NULL, the parser context will be passed as user data. * is NULL, the parser context will be passed as user data.
* *
* @since 2.11.0 * @since 2.11.0
* *
* If you want support older versions, * If you want support older versions, it's best to invoke
* it's best to invoke htmlNewParserCtxt() and set ctxt->sax with * htmlNewParserCtxt() and set `ctxt->sax` with struct assignment.
* struct assignment.
* *
* Also see htmlNewParserCtxt(). * Also see htmlNewParserCtxt().
* *
* @returns the htmlParserCtxtPtr or NULL in case of allocation error * @returns the htmlParserCtxtPtr or NULL in case of allocation error
*/ */
htmlParserCtxtPtr htmlParserCtxtPtr
htmlNewSAXParserCtxt(const htmlSAXHandler *sax, void *userData) htmlNewSAXParserCtxt(const htmlSAXHandler *sax, void *userData)
{ {
@@ -4929,13 +4912,11 @@ htmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
* @param ctxt an HTML parser context * @param ctxt an HTML parser context
* *
* Try to find a comment end tag in the input stream * Try to find a comment end tag in the input stream
* The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags. * The search includes "-->" as well as WHATWG-recommended
* (See https://html.spec.whatwg.org/multipage/parsing.html\#parse-error-incorrectly-closed-comment) * incorrectly-closed tags.
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
* to avoid rescanning sequences of bytes, it DOES change the state of the
* parser, do not use liberally.
* *
* @returns the index to the current parsing point if the full sequence is available, -1 otherwise. * @returns the index to the current parsing point if the full
* sequence is available, -1 otherwise.
*/ */
static int static int
htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt) htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
@@ -5185,10 +5166,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
* *
* The last chunk, which will often be empty, must be marked with * The last chunk, which will often be empty, must be marked with
* the `terminate` flag. With the default SAX callbacks, the resulting * the `terminate` flag. With the default SAX callbacks, the resulting
* document will be available in ctxt->myDoc. This pointer will not * document will be available in `ctxt->myDoc`. This pointer will not
* be freed by the library. * be freed by the library.
* *
* If the document isn't well-formed, ctxt->myDoc is set to NULL. * If the document isn't well-formed, `ctxt->myDoc` is set to NULL.
* *
* @returns an xmlParserErrors code (0 on success). * @returns an xmlParserErrors code (0 on success).
*/ */

View File

@@ -1,12 +1,15 @@
/** /**
* @file * @file
* *
* @brief interface for an HTML 4.0 non-verifying parser * @brief HTML parser, doesn't support HTML5
* *
* this module implements an HTML 4.0 non-verifying parser * This module orginally implemented an HTML parser based on the
* with API compatible with the XML parser ones. It should * (underspecified) HTML 4.0 spec. As of 2.14, the tokenizer
* be able to parse "real world" HTML, even if severely * conforms to HTML5. Tree construction still follows a custom,
* broken from a specification point of view. * unspecified algorithm with many differences to HTML5.
*
* The parser defaults to ISO-8859-1, the default encoding of
* HTTP/1.0.
* *
* @copyright See Copyright for the status of this software. * @copyright See Copyright for the status of this software.
* *
@@ -179,7 +182,7 @@ XMLPUBFUN int
htmlHandleOmittedElem(int val); htmlHandleOmittedElem(int val);
#ifdef LIBXML_PUSH_ENABLED #ifdef LIBXML_PUSH_ENABLED
/** /*
* Interfaces for the Push mode. * Interfaces for the Push mode.
*/ */
XMLPUBFUN htmlParserCtxtPtr XMLPUBFUN htmlParserCtxtPtr