1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-30 22:43:14 +03:00

Restore binary compat, more HTML stuff, allow stdin input, Daniel.

This commit is contained in:
Daniel Veillard
1999-07-06 22:25:25 +00:00
parent be70ff7162
commit 5233ffc8d3
10 changed files with 898 additions and 354 deletions

View File

@ -1,3 +1,9 @@
Wed Jul 7 00:25:42 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.h : Oops removed the binary compatibility problem
* HTMLparser.[ch], HTMLtree.h : More work on the HTML parse/dump
* parser.c, HTMLparser.c: applied patches for reading from stdin
Mon Jul 5 18:45:31 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org> Mon Jul 5 18:45:31 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.c, entities.c, valid.c: cleanup bug #1591 * parser.c, entities.c, valid.c: cleanup bug #1591

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,9 @@
#define __HTML_PARSER_H__ #define __HTML_PARSER_H__
#include "parser.h" #include "parser.h"
/*
* Most of the back-end structures from XML and HTML are shared
*/
typedef xmlParserCtxt htmlParserCtxt; typedef xmlParserCtxt htmlParserCtxt;
typedef xmlParserCtxtPtr htmlParserCtxtPtr; typedef xmlParserCtxtPtr htmlParserCtxtPtr;
typedef xmlParserNodeInfo htmlParserNodeInfo; typedef xmlParserNodeInfo htmlParserNodeInfo;
@ -20,7 +23,33 @@ typedef xmlParserInputPtr htmlParserInputPtr;
typedef xmlDocPtr htmlDocPtr; typedef xmlDocPtr htmlDocPtr;
typedef xmlNodePtr htmlNodePtr; typedef xmlNodePtr htmlNodePtr;
xmlEntityPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt); /*
* Internal description of an HTML element
*/
typedef struct htmlElemDesc {
const CHAR *name; /* The tag name */
int startTag; /* Whether the start tag can be implied */
int endTag; /* Whether the end tag can be implied */
int empty; /* Is this an empty element ? */
int depr; /* Is this a deprecated element ? */
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
const char *desc; /* the description */
} htmlElemDesc, *htmlElemDescPtr;
/*
* Internal description of an HTML entity
*/
typedef struct htmlEntityDesc {
int value; /* the UNICODE value for the character */
const CHAR *name; /* The entity name */
const char *desc; /* the description */
} htmlEntityDesc, *htmlEntityDescPtr;
/*
* There is only few public functions.
*/
htmlEntityDescPtr
htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
int htmlParseCharRef(htmlParserCtxtPtr ctxt); int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt); void htmlParseElement(htmlParserCtxtPtr ctxt);

View File

@ -23,6 +23,7 @@ libxml_la_SOURCES = \
HTMLparser.c \ HTMLparser.c \
debugXML.c \ debugXML.c \
tree.c \ tree.c \
HTMLtree.c \
valid.c valid.c
xmlincdir = $(includedir)/gnome-xml xmlincdir = $(includedir)/gnome-xml
@ -30,6 +31,7 @@ xmlinc_HEADERS = \
entities.h \ entities.h \
encoding.h \ encoding.h \
parser.h \ parser.h \
HTMLparser.h \
parserInternals.h \ parserInternals.h \
debugXML.h \ debugXML.h \
xml-error.h \ xml-error.h \

6
SAX.c
View File

@ -180,8 +180,14 @@ resolveEntity(void *ctx, const CHAR *publicId, const CHAR *systemId)
* TODO : not 100% sure that the appropriate handling in that case. * TODO : not 100% sure that the appropriate handling in that case.
*/ */
if (systemId != NULL) { if (systemId != NULL) {
if (!xmlStrncmp(systemId, "http://", 7)) {
/* !!!!!!!!! TODO */
} else if (!xmlStrncmp(systemId, "ftp://", 6)) {
/* !!!!!!!!! TODO */
} else {
return(xmlNewInputFromFile(ctxt, systemId)); return(xmlNewInputFromFile(ctxt, systemId));
} }
}
return(NULL); return(NULL);
} }

View File

@ -10,6 +10,9 @@
#define __HTML_PARSER_H__ #define __HTML_PARSER_H__
#include "parser.h" #include "parser.h"
/*
* Most of the back-end structures from XML and HTML are shared
*/
typedef xmlParserCtxt htmlParserCtxt; typedef xmlParserCtxt htmlParserCtxt;
typedef xmlParserCtxtPtr htmlParserCtxtPtr; typedef xmlParserCtxtPtr htmlParserCtxtPtr;
typedef xmlParserNodeInfo htmlParserNodeInfo; typedef xmlParserNodeInfo htmlParserNodeInfo;
@ -20,7 +23,33 @@ typedef xmlParserInputPtr htmlParserInputPtr;
typedef xmlDocPtr htmlDocPtr; typedef xmlDocPtr htmlDocPtr;
typedef xmlNodePtr htmlNodePtr; typedef xmlNodePtr htmlNodePtr;
xmlEntityPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt); /*
* Internal description of an HTML element
*/
typedef struct htmlElemDesc {
const CHAR *name; /* The tag name */
int startTag; /* Whether the start tag can be implied */
int endTag; /* Whether the end tag can be implied */
int empty; /* Is this an empty element ? */
int depr; /* Is this a deprecated element ? */
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
const char *desc; /* the description */
} htmlElemDesc, *htmlElemDescPtr;
/*
* Internal description of an HTML entity
*/
typedef struct htmlEntityDesc {
int value; /* the UNICODE value for the character */
const CHAR *name; /* The entity name */
const char *desc; /* the description */
} htmlEntityDesc, *htmlEntityDescPtr;
/*
* There is only few public functions.
*/
htmlEntityDescPtr
htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
int htmlParseCharRef(htmlParserCtxtPtr ctxt); int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt); void htmlParseElement(htmlParserCtxtPtr ctxt);

View File

@ -230,8 +230,6 @@ typedef struct xmlDoc {
char *name; /* name/filename/URI of the document */ char *name; /* name/filename/URI of the document */
const CHAR *version; /* the XML version string */ const CHAR *version; /* the XML version string */
const CHAR *encoding; /* encoding, if any */ const CHAR *encoding; /* encoding, if any */
const CHAR *ID; /* the HTML version */
const CHAR *DTD; /* the HTML dtd URI */
int compression;/* level of zlib compression */ int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */ int standalone; /* standalone document (no external refs) */
struct xmlDtd *intSubset; /* the document internal subset */ struct xmlDtd *intSubset; /* the document internal subset */

166
parser.c
View File

@ -264,28 +264,36 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
#endif #endif
int res; int res;
int len; int len;
int cnt;
struct stat buf; struct stat buf;
char *buffer; char *buffer, *nbuf;
xmlParserInputPtr inputStream; xmlParserInputPtr inputStream;
/* xmlCharEncoding enc; */ /* xmlCharEncoding enc; */
res = stat(filename, &buf); #define MINLEN 40000
if (res < 0) return(NULL);
if (strcmp(filename,"-") == 0) {
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
len = (buf.st_size * 8) + 1000; input = gzdopen (fileno(stdin), "r");
retry_bigger: if (input == NULL) {
buffer = malloc(len); fprintf (stderr, "Cannot read from stdin\n");
#else perror ("gzdopen failed");
len = buf.st_size + 100;
buffer = malloc(len);
#endif
if (buffer == NULL) {
perror("malloc");
return(NULL); return(NULL);
} }
#else
memset(buffer, 0, len); #ifdef WIN32
input = -1;
#else
input = fileno(stdin);
#endif
if (input < 0) {
fprintf (stderr, "Cannot read from stdin\n");
perror ("open failed");
return(NULL);
}
#endif
len = MINLEN;
} else {
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
input = gzopen (filename, "r"); input = gzopen (filename, "r");
if (input == NULL) { if (input == NULL) {
@ -305,10 +313,41 @@ retry_bigger:
return(NULL); return(NULL);
} }
#endif #endif
res = stat(filename, &buf);
if (res < 0)
return(NULL);
len = buf.st_size+1;
if (len < MINLEN)
len = MINLEN;
}
buffer = (char *)malloc(len*sizeof(char));
if (buffer == NULL) {
fprintf (stderr, "Cannot malloc\n");
perror ("malloc failed");
return(NULL);
}
cnt = 0;
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
res = gzread(input, buffer, len); while(!gzeof(input)) {
#else #else
res = read(input, buffer, buf.st_size); while(1) {
#endif
if (cnt >= len) {
len *= 2;
nbuf = (char *)realloc(buffer,len*sizeof(char));
if (nbuf == NULL) {
fprintf(stderr,"Cannot realloc\n");
free(buffer);
perror ("realloc failed");
return(NULL);
}
buffer = nbuf;
}
#ifdef HAVE_ZLIB_H
res = gzread(input, &buffer[cnt], len-cnt);
#else
res = read(input, &buffer[cnt], len-cnt);
#endif #endif
if (res < 0) { if (res < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename); fprintf (stderr, "Cannot read file %s :\n", filename);
@ -319,19 +358,17 @@ retry_bigger:
#endif #endif
return(NULL); return(NULL);
} }
if (res == 0)
break;
cnt += res;
}
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
gzclose(input); gzclose(input);
if (res >= len) {
free(buffer);
len *= 2;
goto retry_bigger;
}
buf.st_size = res;
#else #else
close(input); close(input);
#endif #endif
buffer[buf.st_size] = '\0'; buffer[cnt] = '\0';
inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput)); inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
if (inputStream == NULL) { if (inputStream == NULL) {
@ -4594,28 +4631,36 @@ xmlCreateFileParserCtxt(const char *filename)
#endif #endif
int res; int res;
int len; int len;
int cnt;
struct stat buf; struct stat buf;
char *buffer; char *buffer, *nbuf;
xmlParserInputPtr inputStream; xmlParserInputPtr inputStream;
xmlCharEncoding enc; xmlCharEncoding enc;
res = stat(filename, &buf); #define MINLEN 40000
if (res < 0) return(NULL);
if (strcmp(filename,"-") == 0) {
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
len = (buf.st_size * 8) + 1000; input = gzdopen (fileno(stdin), "r");
retry_bigger: if (input == NULL) {
buffer = malloc(len); fprintf (stderr, "Cannot read from stdin\n");
#else perror ("gzdopen failed");
len = buf.st_size + 100;
buffer = malloc(len);
#endif
if (buffer == NULL) {
perror("malloc");
return(NULL); return(NULL);
} }
#else
memset(buffer, 0, len); #ifdef WIN32
input = -1;
#else
input = fileno(stdin);
#endif
if (input < 0) {
fprintf (stderr, "Cannot read from stdin\n");
perror ("open failed");
return(NULL);
}
#endif
len = MINLEN;
} else {
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
input = gzopen (filename, "r"); input = gzopen (filename, "r");
if (input == NULL) { if (input == NULL) {
@ -4635,10 +4680,41 @@ retry_bigger:
return(NULL); return(NULL);
} }
#endif #endif
res = stat(filename, &buf);
if (res < 0)
return(NULL);
len = buf.st_size+1;
if (len < MINLEN)
len = MINLEN;
}
buffer = (char *)malloc(len*sizeof(char));
if (buffer == NULL) {
fprintf (stderr, "Cannot malloc\n");
perror ("malloc failed");
return(NULL);
}
cnt = 0;
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
res = gzread(input, buffer, len); while(!gzeof(input)) {
#else #else
res = read(input, buffer, buf.st_size); while(1) {
#endif
if (cnt == len) {
len *= 2;
nbuf = (char *)realloc(buffer,len*sizeof(char));
if (nbuf == NULL) {
fprintf(stderr,"Cannot realloc\n");
free(buffer);
perror ("realloc failed");
return(NULL);
}
buffer = nbuf;
}
#ifdef HAVE_ZLIB_H
res = gzread(input, &buffer[cnt], len-cnt);
#else
res = read(input, &buffer[cnt], len-cnt);
#endif #endif
if (res < 0) { if (res < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename); fprintf (stderr, "Cannot read file %s :\n", filename);
@ -4648,20 +4724,18 @@ retry_bigger:
perror ("read failed"); perror ("read failed");
#endif #endif
return(NULL); return(NULL);
}
if (res == 0)
break;
cnt += res;
} }
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
gzclose(input); gzclose(input);
if (res >= len) {
free(buffer);
len *= 2;
goto retry_bigger;
}
buf.st_size = res;
#else #else
close(input); close(input);
#endif #endif
buffer[res] = '\0'; buffer[cnt] = '\0';
ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt)); ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
if (ctxt == NULL) { if (ctxt == NULL) {

2
tree.c
View File

@ -387,8 +387,6 @@ xmlNewDoc(const CHAR *version) {
cur->type = XML_DOCUMENT_NODE; cur->type = XML_DOCUMENT_NODE;
cur->version = xmlStrdup(version); cur->version = xmlStrdup(version);
cur->ID = NULL;
cur->DTD = NULL;
cur->name = NULL; cur->name = NULL;
cur->root = NULL; cur->root = NULL;
cur->intSubset = NULL; cur->intSubset = NULL;

2
tree.h
View File

@ -230,8 +230,6 @@ typedef struct xmlDoc {
char *name; /* name/filename/URI of the document */ char *name; /* name/filename/URI of the document */
const CHAR *version; /* the XML version string */ const CHAR *version; /* the XML version string */
const CHAR *encoding; /* encoding, if any */ const CHAR *encoding; /* encoding, if any */
const CHAR *ID; /* the HTML version */
const CHAR *DTD; /* the HTML dtd URI */
int compression;/* level of zlib compression */ int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */ int standalone; /* standalone document (no external refs) */
struct xmlDtd *intSubset; /* the document internal subset */ struct xmlDtd *intSubset; /* the document internal subset */