mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-29 11:41:22 +03:00
HTML parsing, output is now correct, added HTMLtests target and testcases, Daniel
This commit is contained in:
@ -1,3 +1,9 @@
|
|||||||
|
Wed Jul 7 09:28:43 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||||
|
|
||||||
|
* HTMLparser.[ch], HTMLtree.[ch]: more work for HTML parsing and
|
||||||
|
output.
|
||||||
|
* Makefile.am, test/HTML/*, result/HTML/*: added HTMLtests targetestHTMLt
|
||||||
|
|
||||||
Wed Jul 7 00:25:42 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
|
Wed Jul 7 00:25:42 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||||
|
|
||||||
* parser.h : Oops removed the binary compatibility problem
|
* parser.h : Oops removed the binary compatibility problem
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
#include "valid.h"
|
#include "valid.h"
|
||||||
#include "parserInternals.h"
|
#include "parserInternals.h"
|
||||||
|
|
||||||
#define DEBUG */
|
/* #define DEBUG */
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
* *
|
* *
|
||||||
@ -351,7 +351,6 @@ htmlInitAutoClose(void) {
|
|||||||
htmlElemDescPtr
|
htmlElemDescPtr
|
||||||
htmlTagLookup(const CHAR *tag) {
|
htmlTagLookup(const CHAR *tag) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int cnt;
|
|
||||||
|
|
||||||
for (i = 0; i < (sizeof(html40ElementTable) /
|
for (i = 0; i < (sizeof(html40ElementTable) /
|
||||||
sizeof(html40ElementTable[0]));i++) {
|
sizeof(html40ElementTable[0]));i++) {
|
||||||
@ -408,7 +407,6 @@ htmlCheckAutoClose(const CHAR *new, const CHAR *old) {
|
|||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
htmlAutoClose(htmlParserCtxtPtr ctxt, const CHAR *new) {
|
htmlAutoClose(htmlParserCtxtPtr ctxt, const CHAR *new) {
|
||||||
const CHAR *old;
|
|
||||||
|
|
||||||
while ((ctxt->node != NULL) &&
|
while ((ctxt->node != NULL) &&
|
||||||
(htmlCheckAutoClose(new, ctxt->node->name))) {
|
(htmlCheckAutoClose(new, ctxt->node->name))) {
|
||||||
@ -1933,7 +1931,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
|
atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
|
||||||
if (atts == NULL) {
|
if (atts == NULL) {
|
||||||
fprintf(stderr, "malloc of %ld byte failed\n",
|
fprintf(stderr, "malloc of %ld byte failed\n",
|
||||||
maxatts * sizeof(CHAR *));
|
maxatts * (long)sizeof(CHAR *));
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
} else if (nbatts + 2 < maxatts) {
|
} else if (nbatts + 2 < maxatts) {
|
||||||
@ -1941,7 +1939,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
|
atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
|
||||||
if (atts == NULL) {
|
if (atts == NULL) {
|
||||||
fprintf(stderr, "realloc of %ld byte failed\n",
|
fprintf(stderr, "realloc of %ld byte failed\n",
|
||||||
maxatts * sizeof(CHAR *));
|
maxatts * (long)sizeof(CHAR *));
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -48,8 +48,10 @@ typedef struct htmlEntityDesc {
|
|||||||
/*
|
/*
|
||||||
* There is only few public functions.
|
* There is only few public functions.
|
||||||
*/
|
*/
|
||||||
htmlEntityDescPtr
|
htmlElemDescPtr htmlTagLookup(const CHAR *tag);
|
||||||
htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
|
htmlEntityDescPtr htmlEntityLookup(const CHAR *name);
|
||||||
|
|
||||||
|
htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
|
||||||
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
||||||
void htmlParseElement(htmlParserCtxtPtr ctxt);
|
void htmlParseElement(htmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
|
120
HTMLtree.c
120
HTMLtree.c
@ -12,14 +12,11 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> /* for memset() only ! */
|
#include <string.h> /* for memset() only ! */
|
||||||
|
|
||||||
#include "tree.h"
|
#include "HTMLparser.h"
|
||||||
|
#include "HTMLtree.h"
|
||||||
#include "entities.h"
|
#include "entities.h"
|
||||||
#include "valid.h"
|
#include "valid.h"
|
||||||
|
|
||||||
#define HTML_TEXT_NODE XML_TEXT_NODE
|
|
||||||
#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
|
|
||||||
#define HTML_COMMENT_NODE XML_COMMENT_NODE
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlDtdDump:
|
* htmlDtdDump:
|
||||||
* @buf: the HTML buffer output
|
* @buf: the HTML buffer output
|
||||||
@ -46,23 +43,6 @@ htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
|
|||||||
xmlBufferWriteChar(buf, " SYSTEM ");
|
xmlBufferWriteChar(buf, " SYSTEM ");
|
||||||
xmlBufferWriteQuotedString(buf, cur->SystemID);
|
xmlBufferWriteQuotedString(buf, cur->SystemID);
|
||||||
}
|
}
|
||||||
if ((cur->entities == NULL) && (cur->elements == NULL) &&
|
|
||||||
(cur->attributes == NULL) && (cur->notations == NULL)) {
|
|
||||||
xmlBufferWriteChar(buf, ">\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
xmlBufferWriteChar(buf, " [\n");
|
|
||||||
if (cur->entities != NULL)
|
|
||||||
xmlDumpEntitiesTable(buf, (xmlEntitiesTablePtr) cur->entities);
|
|
||||||
if (cur->notations != NULL)
|
|
||||||
xmlDumpNotationTable(buf, (xmlNotationTablePtr) cur->notations);
|
|
||||||
if (cur->elements != NULL)
|
|
||||||
xmlDumpElementTable(buf, (xmlElementTablePtr) cur->elements);
|
|
||||||
if (cur->attributes != NULL)
|
|
||||||
xmlDumpAttributeTable(buf, (xmlAttributeTablePtr) cur->attributes);
|
|
||||||
xmlBufferWriteChar(buf, "]");
|
|
||||||
|
|
||||||
/* TODO !!! a lot more things to dump ... */
|
|
||||||
xmlBufferWriteChar(buf, ">\n");
|
xmlBufferWriteChar(buf, ">\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -116,30 +96,23 @@ htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
|
|||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level);
|
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
|
||||||
/**
|
/**
|
||||||
* htmlNodeListDump:
|
* htmlNodeListDump:
|
||||||
* @buf: the HTML buffer output
|
* @buf: the HTML buffer output
|
||||||
* @doc: the document
|
* @doc: the document
|
||||||
* @cur: the first node
|
* @cur: the first node
|
||||||
* @level: the imbrication level for indenting
|
|
||||||
*
|
*
|
||||||
* Dump an HTML node list, recursive behaviour,children are printed too.
|
* Dump an HTML node list, recursive behaviour,children are printed too.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
|
htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
|
||||||
int i;
|
|
||||||
|
|
||||||
if (cur == NULL) {
|
if (cur == NULL) {
|
||||||
fprintf(stderr, "htmlNodeListDump : node == NULL\n");
|
fprintf(stderr, "htmlNodeListDump : node == NULL\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (cur != NULL) {
|
while (cur != NULL) {
|
||||||
if ((cur->type != HTML_TEXT_NODE) &&
|
htmlNodeDump(buf, doc, cur);
|
||||||
(cur->type != HTML_ENTITY_REF_NODE)) {
|
|
||||||
xmlBufferWriteChar(buf, "\n");
|
|
||||||
}
|
|
||||||
htmlNodeDump(buf, doc, cur, level);
|
|
||||||
cur = cur->next;
|
cur = cur->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -149,22 +122,26 @@ htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
|
|||||||
* @buf: the HTML buffer output
|
* @buf: the HTML buffer output
|
||||||
* @doc: the document
|
* @doc: the document
|
||||||
* @cur: the current node
|
* @cur: the current node
|
||||||
* @level: the imbrication level for indenting
|
|
||||||
*
|
*
|
||||||
* Dump an HTML node, recursive behaviour,children are printed too.
|
* Dump an HTML node, recursive behaviour,children are printed too.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
|
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
|
||||||
int i;
|
int i;
|
||||||
|
htmlElemDescPtr info;
|
||||||
|
|
||||||
if (cur == NULL) {
|
if (cur == NULL) {
|
||||||
fprintf(stderr, "htmlNodeDump : node == NULL\n");
|
fprintf(stderr, "htmlNodeDump : node == NULL\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* Special cases.
|
||||||
|
*/
|
||||||
if (cur->type == HTML_TEXT_NODE) {
|
if (cur->type == HTML_TEXT_NODE) {
|
||||||
if (cur->content != NULL) {
|
if (cur->content != NULL) {
|
||||||
CHAR *buffer;
|
CHAR *buffer;
|
||||||
|
|
||||||
|
/* uses the HTML encoding routine !!!!!!!!!! */
|
||||||
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
||||||
if (buffer != NULL) {
|
if (buffer != NULL) {
|
||||||
xmlBufferWriteCHAR(buf, buffer);
|
xmlBufferWriteCHAR(buf, buffer);
|
||||||
@ -188,20 +165,38 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlBufferWriteChar(buf, "<");
|
/*
|
||||||
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
* Get specific HTmL info for taht node.
|
||||||
xmlBufferWriteCHAR(buf, cur->ns->prefix);
|
*/
|
||||||
xmlBufferWriteChar(buf, ":");
|
info = htmlTagLookup(cur->name);
|
||||||
}
|
|
||||||
|
|
||||||
|
xmlBufferWriteChar(buf, "<");
|
||||||
xmlBufferWriteCHAR(buf, cur->name);
|
xmlBufferWriteCHAR(buf, cur->name);
|
||||||
if (cur->nsDef)
|
|
||||||
xmlNsListDump(buf, cur->nsDef);
|
|
||||||
if (cur->properties != NULL)
|
if (cur->properties != NULL)
|
||||||
htmlAttrListDump(buf, doc, cur->properties);
|
htmlAttrListDump(buf, doc, cur->properties);
|
||||||
|
|
||||||
|
if (info->empty) {
|
||||||
|
xmlBufferWriteChar(buf, ">");
|
||||||
|
if (cur->next != NULL) {
|
||||||
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||||
|
(cur->next->type != HTML_ENTITY_REF_NODE))
|
||||||
|
xmlBufferWriteChar(buf, "\n");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
if ((cur->content == NULL) && (cur->childs == NULL)) {
|
if ((cur->content == NULL) && (cur->childs == NULL)) {
|
||||||
xmlBufferWriteChar(buf, "/>\n");
|
if (info->endTag != 0)
|
||||||
|
xmlBufferWriteChar(buf, ">");
|
||||||
|
else {
|
||||||
|
xmlBufferWriteChar(buf, "></");
|
||||||
|
xmlBufferWriteCHAR(buf, cur->name);
|
||||||
|
xmlBufferWriteChar(buf, ">");
|
||||||
|
}
|
||||||
|
if (cur->next != NULL) {
|
||||||
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||||
|
(cur->next->type != HTML_ENTITY_REF_NODE))
|
||||||
|
xmlBufferWriteChar(buf, "\n");
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
xmlBufferWriteChar(buf, ">");
|
xmlBufferWriteChar(buf, ">");
|
||||||
@ -215,16 +210,22 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (cur->childs != NULL) {
|
if (cur->childs != NULL) {
|
||||||
htmlNodeListDump(buf, doc, cur->childs, level + 1);
|
if ((cur->childs->type != HTML_TEXT_NODE) &&
|
||||||
|
(cur->childs->type != HTML_ENTITY_REF_NODE))
|
||||||
|
xmlBufferWriteChar(buf, "\n");
|
||||||
|
htmlNodeListDump(buf, doc, cur->childs);
|
||||||
|
if ((cur->last->type != HTML_TEXT_NODE) &&
|
||||||
|
(cur->last->type != HTML_ENTITY_REF_NODE))
|
||||||
|
xmlBufferWriteChar(buf, "\n");
|
||||||
}
|
}
|
||||||
xmlBufferWriteChar(buf, "</");
|
xmlBufferWriteChar(buf, "</");
|
||||||
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
||||||
xmlBufferWriteCHAR(buf, cur->ns->prefix);
|
|
||||||
xmlBufferWriteChar(buf, ":");
|
|
||||||
}
|
|
||||||
|
|
||||||
xmlBufferWriteCHAR(buf, cur->name);
|
xmlBufferWriteCHAR(buf, cur->name);
|
||||||
xmlBufferWriteChar(buf, ">\n");
|
xmlBufferWriteChar(buf, ">");
|
||||||
|
if (cur->next != NULL) {
|
||||||
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||||
|
(cur->next->type != HTML_ENTITY_REF_NODE))
|
||||||
|
xmlBufferWriteChar(buf, "\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -236,29 +237,12 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
|
htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
|
||||||
xmlBufferWriteChar(buf, "<?xml version=");
|
|
||||||
if (cur->version != NULL)
|
|
||||||
xmlBufferWriteQuotedString(buf, cur->version);
|
|
||||||
else
|
|
||||||
xmlBufferWriteChar(buf, "\"1.0\"");
|
|
||||||
if (cur->encoding != NULL) {
|
|
||||||
xmlBufferWriteChar(buf, " encoding=");
|
|
||||||
xmlBufferWriteQuotedString(buf, cur->encoding);
|
|
||||||
}
|
|
||||||
switch (cur->standalone) {
|
|
||||||
case 0:
|
|
||||||
xmlBufferWriteChar(buf, " standalone=\"no\"");
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
xmlBufferWriteChar(buf, " standalone=\"yes\"");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
xmlBufferWriteChar(buf, "?>\n");
|
|
||||||
if (cur->intSubset != NULL)
|
if (cur->intSubset != NULL)
|
||||||
htmlDtdDump(buf, cur);
|
htmlDtdDump(buf, cur);
|
||||||
if (cur->root != NULL) {
|
if (cur->root != NULL) {
|
||||||
htmlNodeDump(buf, cur, cur->root, 0);
|
htmlNodeDump(buf, cur, cur->root);
|
||||||
}
|
}
|
||||||
|
xmlBufferWriteChar(buf, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
34
HTMLtree.h
Normal file
34
HTMLtree.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/*
|
||||||
|
* tree.h : describes the structures found in an tree resulting
|
||||||
|
* from an XML parsing.
|
||||||
|
*
|
||||||
|
* See Copyright for the status of this software.
|
||||||
|
*
|
||||||
|
* Daniel.Veillard@w3.org
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __HTML_TREE_H__
|
||||||
|
#define __HTML_TREE_H__
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "tree.h"
|
||||||
|
|
||||||
|
#define HTML_TEXT_NODE XML_TEXT_NODE
|
||||||
|
#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
|
||||||
|
#define HTML_COMMENT_NODE XML_COMMENT_NODE
|
||||||
|
|
||||||
|
void htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size);
|
||||||
|
void htmlDocDump(FILE *f, xmlDocPtr cur);
|
||||||
|
int htmlSaveFile(const char *filename, xmlDocPtr cur);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __HTML_TREE_H__ */
|
||||||
|
|
19
Makefile.am
19
Makefile.am
@ -61,7 +61,24 @@ check-local: tests
|
|||||||
|
|
||||||
testall : tests SVGtests SAXtests
|
testall : tests SVGtests SAXtests
|
||||||
|
|
||||||
tests : tester
|
tests: HTMLtests XMLtests
|
||||||
|
HTMLtests : testHTML
|
||||||
|
@(DIR=`pwd`; cd $(srcdir) ; \
|
||||||
|
for i in test/HTML/* ; do \
|
||||||
|
if [ ! -d $$i ] ; then \
|
||||||
|
if [ ! -f result/HTML/`basename $$i` ] ; then \
|
||||||
|
echo New test file `basename $$i` ; \
|
||||||
|
$$DIR/testHTML $$i > result/HTML/`basename $$i` ; \
|
||||||
|
else \
|
||||||
|
echo Testing `basename $$i` ; \
|
||||||
|
$$DIR/testHTML $$i > result.`basename $$i` ; \
|
||||||
|
diff result/HTML/`basename $$i` result.`basename $$i` ; \
|
||||||
|
$$DIR/testHTML result.`basename $$i` > result2.`basename $$i` ; \
|
||||||
|
diff result.`basename $$i` result2.`basename $$i` ; \
|
||||||
|
rm result.`basename $$i` result2.`basename $$i` ; \
|
||||||
|
fi ; fi ; done)
|
||||||
|
|
||||||
|
XMLtests : tester
|
||||||
@(DIR=`pwd`; cd $(srcdir) ; \
|
@(DIR=`pwd`; cd $(srcdir) ; \
|
||||||
for i in test/* ; do \
|
for i in test/* ; do \
|
||||||
if [ ! -d $$i ] ; then \
|
if [ ! -d $$i ] ; then \
|
||||||
|
@ -48,8 +48,10 @@ typedef struct htmlEntityDesc {
|
|||||||
/*
|
/*
|
||||||
* There is only few public functions.
|
* There is only few public functions.
|
||||||
*/
|
*/
|
||||||
htmlEntityDescPtr
|
htmlElemDescPtr htmlTagLookup(const CHAR *tag);
|
||||||
htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
|
htmlEntityDescPtr htmlEntityLookup(const CHAR *name);
|
||||||
|
|
||||||
|
htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
|
||||||
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
||||||
void htmlParseElement(htmlParserCtxtPtr ctxt);
|
void htmlParseElement(htmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
|
34
include/libxml/HTMLtree.h
Normal file
34
include/libxml/HTMLtree.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/*
|
||||||
|
* tree.h : describes the structures found in an tree resulting
|
||||||
|
* from an XML parsing.
|
||||||
|
*
|
||||||
|
* See Copyright for the status of this software.
|
||||||
|
*
|
||||||
|
* Daniel.Veillard@w3.org
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __HTML_TREE_H__
|
||||||
|
#define __HTML_TREE_H__
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "tree.h"
|
||||||
|
|
||||||
|
#define HTML_TEXT_NODE XML_TEXT_NODE
|
||||||
|
#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
|
||||||
|
#define HTML_COMMENT_NODE XML_COMMENT_NODE
|
||||||
|
|
||||||
|
void htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size);
|
||||||
|
void htmlDocDump(FILE *f, xmlDocPtr cur);
|
||||||
|
int htmlSaveFile(const char *filename, xmlDocPtr cur);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __HTML_TREE_H__ */
|
||||||
|
|
12
result/HTML/Down.html
Normal file
12
result/HTML/Down.html
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>This service is temporary down</TITLE>
|
||||||
|
</HEAD>
|
||||||
|
<BODY bgcolor="#FFFFFF">
|
||||||
|
<H1 align="center">Sorry, this service is temporary down</H1>
|
||||||
|
We are doing our best to get it back on-line,
|
||||||
|
|
||||||
|
<P>The W3C system administrators</P>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
41
result/HTML/test2.html
Normal file
41
result/HTML/test2.html
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<TITLE>Linux Today</TITLE>
|
||||||
|
</HEAD>
|
||||||
|
<BODY bgcolor="White" link="Blue" text="Black" VLINK="Black" ALINK="Red">
|
||||||
|
<CENTER>
|
||||||
|
<TABLE BORDER="0" WIDTH="100%" CELLSPACING="0" CELLPADDING="0">
|
||||||
|
<TR BGCOLOR="#FFFFFF">
|
||||||
|
<TD HEIGHT="90">
|
||||||
|
<A href="http://linuxtoday.com/cgi-bin/click.pl?adnum=49">
|
||||||
|
<IMG src="/pics/door_linux.gif" border="0" width="468" height="60" alt="Atipa Linux solutions. Your reliable cluster, server, and workstation solution. Win a Free Celeron Linux Workstation!">
|
||||||
|
</A>
|
||||||
|
</TD>
|
||||||
|
<TD>
|
||||||
|
<IMG SRC="/pics/lt.gif" VSPACE="5" alt="Linux Today Logo">
|
||||||
|
<BR>
|
||||||
|
<FONT size="-1">
|
||||||
|
<A href="http://linux.com">linux.com</A> partner</FONT>
|
||||||
|
<P>
|
||||||
|
</TD>
|
||||||
|
</TR>
|
||||||
|
</TABLE>
|
||||||
|
<FONT size="2" face="Helvetica">
|
||||||
|
[ <A href="http://linuxtoday.com/">headlines</A> |
|
||||||
|
<A href="http://features.linuxtoday.com/">features</A> |
|
||||||
|
<A href="http://commercial.linuxtoday.com/">commercial</A> |
|
||||||
|
<A href="http://security.linuxtoday.com/">security</A> |
|
||||||
|
<A href="http://jobs.linuxtoday.com/">jobs</A> |
|
||||||
|
<A href="http://linuxtoday.com/volt/">volt</A> |
|
||||||
|
<A href="http://linuxtoday.com/contrib.pl">contribute/submit</A> |
|
||||||
|
<A href="http://linuxtoday.com/advertise/">advertise</A> |
|
||||||
|
<A href="http://linuxtoday.com/search.html">search</A> |
|
||||||
|
<A href="http://linuxtoday.com/digests/">site digests</A> |
|
||||||
|
<A href="http://linuxtoday.com/mail-lists">mailing lists</A> |
|
||||||
|
<A href="http://linuxtoday.com/about/">about us</A> |
|
||||||
|
<A href="http://linuxtoday.com/linkus.html">link us</A> ]</FONT>
|
||||||
|
</CENTER>
|
||||||
|
<P>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
88
result/HTML/test3.html
Normal file
88
result/HTML/test3.html
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
<!DOCTYPE HTML>
|
||||||
|
<HTML>
|
||||||
|
<HEAD>
|
||||||
|
<BASE target="contents">
|
||||||
|
</HEAD>
|
||||||
|
<A name="ProblemDomain.Package">
|
||||||
|
<H2>Component Package diagram ProblemDomain</H2>
|
||||||
|
</A>
|
||||||
|
<P>
|
||||||
|
<HR>
|
||||||
|
<DL>
|
||||||
|
<DT>
|
||||||
|
<B>Stereotype </B>problem domain</DT>
|
||||||
|
<DT>
|
||||||
|
<B>Alias </B>Problem Domain</DT>
|
||||||
|
<DT>
|
||||||
|
<B>Note </B>
|
||||||
|
<DD>The Problem Domain package is the model behind the Human
|
||||||
|
<DD>Interface, thats stores and manipulates the Family Tree.
|
||||||
|
</DD>
|
||||||
|
</DD>
|
||||||
|
</DT>
|
||||||
|
</DL>
|
||||||
|
<P>
|
||||||
|
<HR>
|
||||||
|
<DL>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="HumanInterface.FamilyFrame.html#HumanInterface.FamilyFrame">HumanInterface.FamilyFrame</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="ProblemDomain.Birth.html#ProblemDomain.Birth">ProblemDomain.Birth</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="ProblemDomain.Death.html#ProblemDomain.Death">ProblemDomain.Death</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="ProblemDomain.Divorce.html#ProblemDomain.Divorce">ProblemDomain.Divorce</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="ProblemDomain.Family.html#ProblemDomain.Family">ProblemDomain.Family</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="ProblemDomain.Individual.html#ProblemDomain.Individual">ProblemDomain.Individual</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="ProblemDomain.LifeEvent.html#ProblemDomain.LifeEvent">ProblemDomain.LifeEvent</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="ProblemDomain.Marriage.html#ProblemDomain.Marriage">ProblemDomain.Marriage</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
<DT>
|
||||||
|
<H4>Class <A href="ProblemDomain.Note.html#ProblemDomain.Note">ProblemDomain.Note</A>
|
||||||
|
</H4>
|
||||||
|
</DT>
|
||||||
|
</DL>
|
||||||
|
<H4>
|
||||||
|
<B>Links</B>
|
||||||
|
</H4>
|
||||||
|
<UL>
|
||||||
|
<LI>
|
||||||
|
<B>Link to </B>
|
||||||
|
<A href="HumanInterface.Package.html#HumanInterface.Package">HumanInterface</A>
|
||||||
|
</LI>
|
||||||
|
</UL>
|
||||||
|
<DIR></DIR>
|
||||||
|
<UL>
|
||||||
|
<LI>
|
||||||
|
<B>Link to </B>
|
||||||
|
<A href="DataManagement.FlatFile.Package.html#DataManagement.FlatFile.Package">DataManagement.FlatFile</A>
|
||||||
|
</LI>
|
||||||
|
</UL>
|
||||||
|
<DIR></DIR>
|
||||||
|
<UL>
|
||||||
|
<LI>
|
||||||
|
<B>Link to </B>
|
||||||
|
<A href="DataManagement.Package.html#DataManagement.Package">DataManagement</A>
|
||||||
|
</LI>
|
||||||
|
</UL>
|
||||||
|
<DIR></DIR>
|
||||||
|
</HTML>
|
14
test/HTML/Down.html
Normal file
14
test/HTML/Down.html
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
|
||||||
|
"http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>This service is temporary down</title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body bgcolor="#FFFFFF">
|
||||||
|
<h1 align="center">Sorry, this service is temporary down</h1>
|
||||||
|
We are doing our best to get it back on-line,
|
||||||
|
|
||||||
|
<p>The W3C system administrators</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
33
test/HTML/test2.html
Normal file
33
test/HTML/test2.html
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<HTML><HEAD> <TITLE>Linux Today</TITLE></HEAD>
|
||||||
|
<body bgcolor="White" link="Blue" text="Black" VLINK="Black" ALINK="Red">
|
||||||
|
|
||||||
|
<center>
|
||||||
|
<TABLE BORDER="0" WIDTH="100%" CELLSPACING="0" CELLPADDING="0">
|
||||||
|
<TR BGCOLOR="#FFFFFF">
|
||||||
|
<TD HEIGHT="90">
|
||||||
|
<a href="http://linuxtoday.com/cgi-bin/click.pl?adnum=49"><img src="/pics/door_linux.gif" border="0" width="468" height="60" alt="Atipa Linux solutions. Your reliable cluster, server, and workstation solution. Win a Free Celeron Linux Workstation!"></A>
|
||||||
|
|
||||||
|
</TD>
|
||||||
|
<TD><IMG SRC="/pics/lt.gif" VSPACE=5 alt="Linux Today Logo"><br><font size="-1"><a href="http://linux.com">linux.com</a> partner</font><p></TD>
|
||||||
|
|
||||||
|
</TR>
|
||||||
|
</TABLE>
|
||||||
|
<font size="2" face="Helvetica">
|
||||||
|
[ <a href="http://linuxtoday.com/">headlines</A> |
|
||||||
|
<a href="http://features.linuxtoday.com/">features</A> |
|
||||||
|
<a href="http://commercial.linuxtoday.com/">commercial</a> |
|
||||||
|
<a href="http://security.linuxtoday.com/">security</a> |
|
||||||
|
<a href="http://jobs.linuxtoday.com/">jobs</a> |
|
||||||
|
<a href="http://linuxtoday.com/volt/">volt</a> |
|
||||||
|
<a href="http://linuxtoday.com/contrib.pl">contribute/submit</a> |
|
||||||
|
<a href="http://linuxtoday.com/advertise/">advertise</A> |
|
||||||
|
<a href="http://linuxtoday.com/search.html">search</A> |
|
||||||
|
<a href="http://linuxtoday.com/digests/">site digests</A> |
|
||||||
|
<a href="http://linuxtoday.com/mail-lists">mailing lists</A> |
|
||||||
|
<a href="http://linuxtoday.com/about/">about us</a> |
|
||||||
|
<a href="http://linuxtoday.com/linkus.html">link us</A> ]</font>
|
||||||
|
</center>
|
||||||
|
<P>
|
||||||
|
</body>
|
||||||
|
</html>
|
34
test/HTML/test3.html
Normal file
34
test/HTML/test3.html
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<base target=contents>
|
||||||
|
</head>
|
||||||
|
<a name=ProblemDomain.Package><h2>Component Package diagram ProblemDomain</h2>
|
||||||
|
</a><p><hr></p>
|
||||||
|
<dl>
|
||||||
|
<dt><b>Stereotype </b>problem domain</dt>
|
||||||
|
<dt><b>Alias </b>Problem Domain</dt>
|
||||||
|
<dt><b>Note </b><dd>The Problem Domain package is the model behind the Human
|
||||||
|
<dd>Interface, thats stores and manipulates the Family Tree.
|
||||||
|
</dl>
|
||||||
|
<p><hr></p>
|
||||||
|
<dl>
|
||||||
|
|
||||||
|
<dt><h4>Class <a href=HumanInterface.FamilyFrame.html#HumanInterface.FamilyFrame>HumanInterface.FamilyFrame</a></h4></dt>
|
||||||
|
<dt><h4>Class <a href=ProblemDomain.Birth.html#ProblemDomain.Birth>ProblemDomain.Birth</a></h4></dt>
|
||||||
|
<dt><h4>Class <a href=ProblemDomain.Death.html#ProblemDomain.Death>ProblemDomain.Death</a></h4></dt>
|
||||||
|
<dt><h4>Class <a href=ProblemDomain.Divorce.html#ProblemDomain.Divorce>ProblemDomain.Divorce</a></h4></dt>
|
||||||
|
<dt><h4>Class <a href=ProblemDomain.Family.html#ProblemDomain.Family>ProblemDomain.Family</a></h4></dt>
|
||||||
|
<dt><h4>Class <a href=ProblemDomain.Individual.html#ProblemDomain.Individual>ProblemDomain.Individual</a></h4></dt>
|
||||||
|
<dt><h4>Class <a href=ProblemDomain.LifeEvent.html#ProblemDomain.LifeEvent>ProblemDomain.LifeEvent</a></h4></dt>
|
||||||
|
<dt><h4>Class <a href=ProblemDomain.Marriage.html#ProblemDomain.Marriage>ProblemDomain.Marriage</a></h4></dt>
|
||||||
|
<dt><h4>Class <a href=ProblemDomain.Note.html#ProblemDomain.Note>ProblemDomain.Note</a></h4></dt>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<h4><b>Links</h4></b>
|
||||||
|
<ul><li><b>Link to </b><a href=HumanInterface.Package.html#HumanInterface.Package>HumanInterface</a></li></ul>
|
||||||
|
<dir></dir>
|
||||||
|
<ul><li><b>Link to </b><a href=DataManagement.FlatFile.Package.html#DataManagement.FlatFile.Package>DataManagement.FlatFile</a></li></ul>
|
||||||
|
<dir></dir>
|
||||||
|
<ul><li><b>Link to </b><a href=DataManagement.Package.html#DataManagement.Package>DataManagement</a></li></ul>
|
||||||
|
<dir></dir>
|
||||||
|
</html>
|
@ -27,7 +27,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "HTMLparser.h"
|
#include "HTMLparser.h"
|
||||||
#include "tree.h"
|
#include "HTMLtree.h"
|
||||||
#include "debugXML.h"
|
#include "debugXML.h"
|
||||||
|
|
||||||
static int debug = 0;
|
static int debug = 0;
|
||||||
@ -80,7 +80,7 @@ void parseAndPrintFile(char *filename) {
|
|||||||
* print it.
|
* print it.
|
||||||
*/
|
*/
|
||||||
if (!debug)
|
if (!debug)
|
||||||
xmlDocDump(stdout, doc);
|
htmlDocDump(stdout, doc);
|
||||||
else
|
else
|
||||||
xmlDebugDumpDocument(stdout, doc);
|
xmlDebugDumpDocument(stdout, doc);
|
||||||
|
|
||||||
@ -111,7 +111,7 @@ void parseAndPrintBuffer(CHAR *buf) {
|
|||||||
* print it.
|
* print it.
|
||||||
*/
|
*/
|
||||||
if (!debug)
|
if (!debug)
|
||||||
xmlDocDump(stdout, doc);
|
htmlDocDump(stdout, doc);
|
||||||
else
|
else
|
||||||
xmlDebugDumpDocument(stdout, doc);
|
xmlDebugDumpDocument(stdout, doc);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user