doc: Misc fixes to HTML parser docs

2025-10-24 13:33:01 +03:00 · 2025-05-03 16:34:02 +02:00
parent 411f30ef2a
commit b7274fb02f
2 changed files with 21 additions and 37 deletions
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4208,13 +4208,8 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
 /**
 * @param ctxt  an HTML parser context
 *
- * parse an HTML element, new version, non recursive
+ * Parse an HTML element, new version, non recursive
 *
 * [39] element ::= EmptyElemTag | STag content ETag
 *
 * [41] Attribute ::= Name Eq AttValue
 */
 static int
 htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
    const xmlChar *name;
@@ -4286,14 +4281,8 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
 *
 * @deprecated Internal function, don't use.
 *
- * parse an HTML element, this is highly recursive
+ * This is kept for compatibility with previous code versions
 * this is kept for compatibility with previous code versions
 *
 * [39] element ::= EmptyElemTag | STag content ETag
 *
 * [41] Attribute ::= Name Eq AttValue
 */
 void
 htmlParseElement(htmlParserCtxtPtr ctxt) {
    const xmlChar *oldptr;
@@ -4386,7 +4375,6 @@ htmlCtxtParseContentInternal(htmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
 *
 * @returns 0, -1 in case of error.
 */
 int
 htmlParseDocument(htmlParserCtxtPtr ctxt) {
    xmlDtdPtr dtd;
@@ -4518,7 +4506,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
 *
 * @returns 0 in case of success and -1 in case of error
 */
 static int
 htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax,
                   void *userData)
@@ -4604,9 +4591,8 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt, const htmlSAXHandler *sax,
 * @param ctxt  an HTML parser context
 *
 * Free all the memory used by a parser context. However the parsed
- * document in ctxt->myDoc is not freed.
+ * document in `ctxt->myDoc` is not freed.
 */
 void
 htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
 {
@@ -4627,7 +4613,6 @@ htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
 *
 * @returns the htmlParserCtxtPtr or NULL in case of allocation error
 */
 htmlParserCtxtPtr
 htmlNewParserCtxt(void)
 {
@@ -4638,20 +4623,18 @@ htmlNewParserCtxt(void)
 * @param sax  SAX handler
 * @param userData  user data
 *
- * Allocate and initialize a new HTML SAX parser context. If userData
+ * Allocate and initialize a new HTML SAX parser context. If `userData`
 * is NULL, the parser context will be passed as user data.
 *
 * @since 2.11.0
 *
- * If you want support older versions,
+ * If you want support older versions, it's best to invoke
- * it's best to invoke htmlNewParserCtxt() and set ctxt->sax with
+ * htmlNewParserCtxt() and set `ctxt->sax` with struct assignment.
 * struct assignment.
 *
 * Also see htmlNewParserCtxt().
 *
 * @returns the htmlParserCtxtPtr or NULL in case of allocation error
 */
 htmlParserCtxtPtr
 htmlNewSAXParserCtxt(const htmlSAXHandler *sax, void *userData)
 {
@@ -4929,13 +4912,11 @@ htmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
 * @param ctxt  an HTML parser context
 *
 * Try to find a comment end tag in the input stream
- * The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
+ * The search includes "-->" as well as WHATWG-recommended
- * (See https://html.spec.whatwg.org/multipage/parsing.html\#parse-error-incorrectly-closed-comment)
+ * incorrectly-closed tags.
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
 * to avoid rescanning sequences of bytes, it DOES change the state of the
 * parser, do not use liberally.
 *
- * @returns the index to the current parsing point if the full sequence is available, -1 otherwise.
+ * @returns the index to the current parsing point if the full
 * sequence is available, -1 otherwise.
 */
 static int
 htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
@@ -5185,10 +5166,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
 *
 * The last chunk, which will often be empty, must be marked with
 * the `terminate` flag. With the default SAX callbacks, the resulting
- * document will be available in ctxt->myDoc. This pointer will not
+ * document will be available in `ctxt->myDoc`. This pointer will not
 * be freed by the library.
 *
- * If the document isn't well-formed, ctxt->myDoc is set to NULL.
+ * If the document isn't well-formed, `ctxt->myDoc` is set to NULL.
 *
 * @returns an xmlParserErrors code (0 on success).
 */
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -1,12 +1,15 @@
 /**
 * @file
 * 
- * @brief interface for an HTML 4.0 non-verifying parser
+ * @brief HTML parser, doesn't support HTML5
 * 
- * this module implements an HTML 4.0 non-verifying parser
+ * This module orginally implemented an HTML parser based on the
- *              with API compatible with the XML parser ones. It should
+ * (underspecified) HTML 4.0 spec. As of 2.14, the tokenizer
- *              be able to parse "real world" HTML, even if severely
+ * conforms to HTML5. Tree construction still follows a custom,
- *              broken from a specification point of view.
+ * unspecified algorithm with many differences to HTML5.
 *
 * The parser defaults to ISO-8859-1, the default encoding of
 * HTTP/1.0.
 *
 * @copyright See Copyright for the status of this software.
 *
@@ -179,7 +182,7 @@ XMLPUBFUN int
 			htmlHandleOmittedElem(int val);
 #ifdef LIBXML_PUSH_ENABLED
-/**
+/*
 * Interfaces for the Push mode.
 */
 XMLPUBFUN htmlParserCtxtPtr