mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-30 22:43:14 +03:00
Restore binary compat, more HTML stuff, allow stdin input, Daniel.
This commit is contained in:
@ -1,3 +1,9 @@
|
|||||||
|
Wed Jul 7 00:25:42 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||||
|
|
||||||
|
* parser.h : Oops removed the binary compatibility problem
|
||||||
|
* HTMLparser.[ch], HTMLtree.h : More work on the HTML parse/dump
|
||||||
|
* parser.c, HTMLparser.c: applied patches for reading from stdin
|
||||||
|
|
||||||
Mon Jul 5 18:45:31 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
|
Mon Jul 5 18:45:31 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||||
|
|
||||||
* parser.c, entities.c, valid.c: cleanup bug #1591
|
* parser.c, entities.c, valid.c: cleanup bug #1591
|
||||||
|
906
HTMLparser.c
906
HTMLparser.c
File diff suppressed because it is too large
Load Diff
31
HTMLparser.h
31
HTMLparser.h
@ -10,6 +10,9 @@
|
|||||||
#define __HTML_PARSER_H__
|
#define __HTML_PARSER_H__
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Most of the back-end structures from XML and HTML are shared
|
||||||
|
*/
|
||||||
typedef xmlParserCtxt htmlParserCtxt;
|
typedef xmlParserCtxt htmlParserCtxt;
|
||||||
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
|
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
|
||||||
typedef xmlParserNodeInfo htmlParserNodeInfo;
|
typedef xmlParserNodeInfo htmlParserNodeInfo;
|
||||||
@ -20,7 +23,33 @@ typedef xmlParserInputPtr htmlParserInputPtr;
|
|||||||
typedef xmlDocPtr htmlDocPtr;
|
typedef xmlDocPtr htmlDocPtr;
|
||||||
typedef xmlNodePtr htmlNodePtr;
|
typedef xmlNodePtr htmlNodePtr;
|
||||||
|
|
||||||
xmlEntityPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt);
|
/*
|
||||||
|
* Internal description of an HTML element
|
||||||
|
*/
|
||||||
|
typedef struct htmlElemDesc {
|
||||||
|
const CHAR *name; /* The tag name */
|
||||||
|
int startTag; /* Whether the start tag can be implied */
|
||||||
|
int endTag; /* Whether the end tag can be implied */
|
||||||
|
int empty; /* Is this an empty element ? */
|
||||||
|
int depr; /* Is this a deprecated element ? */
|
||||||
|
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
|
||||||
|
const char *desc; /* the description */
|
||||||
|
} htmlElemDesc, *htmlElemDescPtr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Internal description of an HTML entity
|
||||||
|
*/
|
||||||
|
typedef struct htmlEntityDesc {
|
||||||
|
int value; /* the UNICODE value for the character */
|
||||||
|
const CHAR *name; /* The entity name */
|
||||||
|
const char *desc; /* the description */
|
||||||
|
} htmlEntityDesc, *htmlEntityDescPtr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There is only few public functions.
|
||||||
|
*/
|
||||||
|
htmlEntityDescPtr
|
||||||
|
htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
|
||||||
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
||||||
void htmlParseElement(htmlParserCtxtPtr ctxt);
|
void htmlParseElement(htmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@ libxml_la_SOURCES = \
|
|||||||
HTMLparser.c \
|
HTMLparser.c \
|
||||||
debugXML.c \
|
debugXML.c \
|
||||||
tree.c \
|
tree.c \
|
||||||
|
HTMLtree.c \
|
||||||
valid.c
|
valid.c
|
||||||
|
|
||||||
xmlincdir = $(includedir)/gnome-xml
|
xmlincdir = $(includedir)/gnome-xml
|
||||||
@ -30,6 +31,7 @@ xmlinc_HEADERS = \
|
|||||||
entities.h \
|
entities.h \
|
||||||
encoding.h \
|
encoding.h \
|
||||||
parser.h \
|
parser.h \
|
||||||
|
HTMLparser.h \
|
||||||
parserInternals.h \
|
parserInternals.h \
|
||||||
debugXML.h \
|
debugXML.h \
|
||||||
xml-error.h \
|
xml-error.h \
|
||||||
|
6
SAX.c
6
SAX.c
@ -180,8 +180,14 @@ resolveEntity(void *ctx, const CHAR *publicId, const CHAR *systemId)
|
|||||||
* TODO : not 100% sure that the appropriate handling in that case.
|
* TODO : not 100% sure that the appropriate handling in that case.
|
||||||
*/
|
*/
|
||||||
if (systemId != NULL) {
|
if (systemId != NULL) {
|
||||||
|
if (!xmlStrncmp(systemId, "http://", 7)) {
|
||||||
|
/* !!!!!!!!! TODO */
|
||||||
|
} else if (!xmlStrncmp(systemId, "ftp://", 6)) {
|
||||||
|
/* !!!!!!!!! TODO */
|
||||||
|
} else {
|
||||||
return(xmlNewInputFromFile(ctxt, systemId));
|
return(xmlNewInputFromFile(ctxt, systemId));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,6 +10,9 @@
|
|||||||
#define __HTML_PARSER_H__
|
#define __HTML_PARSER_H__
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Most of the back-end structures from XML and HTML are shared
|
||||||
|
*/
|
||||||
typedef xmlParserCtxt htmlParserCtxt;
|
typedef xmlParserCtxt htmlParserCtxt;
|
||||||
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
|
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
|
||||||
typedef xmlParserNodeInfo htmlParserNodeInfo;
|
typedef xmlParserNodeInfo htmlParserNodeInfo;
|
||||||
@ -20,7 +23,33 @@ typedef xmlParserInputPtr htmlParserInputPtr;
|
|||||||
typedef xmlDocPtr htmlDocPtr;
|
typedef xmlDocPtr htmlDocPtr;
|
||||||
typedef xmlNodePtr htmlNodePtr;
|
typedef xmlNodePtr htmlNodePtr;
|
||||||
|
|
||||||
xmlEntityPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt);
|
/*
|
||||||
|
* Internal description of an HTML element
|
||||||
|
*/
|
||||||
|
typedef struct htmlElemDesc {
|
||||||
|
const CHAR *name; /* The tag name */
|
||||||
|
int startTag; /* Whether the start tag can be implied */
|
||||||
|
int endTag; /* Whether the end tag can be implied */
|
||||||
|
int empty; /* Is this an empty element ? */
|
||||||
|
int depr; /* Is this a deprecated element ? */
|
||||||
|
int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
|
||||||
|
const char *desc; /* the description */
|
||||||
|
} htmlElemDesc, *htmlElemDescPtr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Internal description of an HTML entity
|
||||||
|
*/
|
||||||
|
typedef struct htmlEntityDesc {
|
||||||
|
int value; /* the UNICODE value for the character */
|
||||||
|
const CHAR *name; /* The entity name */
|
||||||
|
const char *desc; /* the description */
|
||||||
|
} htmlEntityDesc, *htmlEntityDescPtr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There is only few public functions.
|
||||||
|
*/
|
||||||
|
htmlEntityDescPtr
|
||||||
|
htmlParseEntityRef(htmlParserCtxtPtr ctxt, CHAR **str);
|
||||||
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
|
||||||
void htmlParseElement(htmlParserCtxtPtr ctxt);
|
void htmlParseElement(htmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
|
@ -230,8 +230,6 @@ typedef struct xmlDoc {
|
|||||||
char *name; /* name/filename/URI of the document */
|
char *name; /* name/filename/URI of the document */
|
||||||
const CHAR *version; /* the XML version string */
|
const CHAR *version; /* the XML version string */
|
||||||
const CHAR *encoding; /* encoding, if any */
|
const CHAR *encoding; /* encoding, if any */
|
||||||
const CHAR *ID; /* the HTML version */
|
|
||||||
const CHAR *DTD; /* the HTML dtd URI */
|
|
||||||
int compression;/* level of zlib compression */
|
int compression;/* level of zlib compression */
|
||||||
int standalone; /* standalone document (no external refs) */
|
int standalone; /* standalone document (no external refs) */
|
||||||
struct xmlDtd *intSubset; /* the document internal subset */
|
struct xmlDtd *intSubset; /* the document internal subset */
|
||||||
|
166
parser.c
166
parser.c
@ -264,28 +264,36 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
|
|||||||
#endif
|
#endif
|
||||||
int res;
|
int res;
|
||||||
int len;
|
int len;
|
||||||
|
int cnt;
|
||||||
struct stat buf;
|
struct stat buf;
|
||||||
char *buffer;
|
char *buffer, *nbuf;
|
||||||
xmlParserInputPtr inputStream;
|
xmlParserInputPtr inputStream;
|
||||||
/* xmlCharEncoding enc; */
|
/* xmlCharEncoding enc; */
|
||||||
|
|
||||||
res = stat(filename, &buf);
|
#define MINLEN 40000
|
||||||
if (res < 0) return(NULL);
|
|
||||||
|
|
||||||
|
if (strcmp(filename,"-") == 0) {
|
||||||
#ifdef HAVE_ZLIB_H
|
#ifdef HAVE_ZLIB_H
|
||||||
len = (buf.st_size * 8) + 1000;
|
input = gzdopen (fileno(stdin), "r");
|
||||||
retry_bigger:
|
if (input == NULL) {
|
||||||
buffer = malloc(len);
|
fprintf (stderr, "Cannot read from stdin\n");
|
||||||
#else
|
perror ("gzdopen failed");
|
||||||
len = buf.st_size + 100;
|
|
||||||
buffer = malloc(len);
|
|
||||||
#endif
|
|
||||||
if (buffer == NULL) {
|
|
||||||
perror("malloc");
|
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
memset(buffer, 0, len);
|
#ifdef WIN32
|
||||||
|
input = -1;
|
||||||
|
#else
|
||||||
|
input = fileno(stdin);
|
||||||
|
#endif
|
||||||
|
if (input < 0) {
|
||||||
|
fprintf (stderr, "Cannot read from stdin\n");
|
||||||
|
perror ("open failed");
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
len = MINLEN;
|
||||||
|
} else {
|
||||||
#ifdef HAVE_ZLIB_H
|
#ifdef HAVE_ZLIB_H
|
||||||
input = gzopen (filename, "r");
|
input = gzopen (filename, "r");
|
||||||
if (input == NULL) {
|
if (input == NULL) {
|
||||||
@ -305,10 +313,41 @@ retry_bigger:
|
|||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
res = stat(filename, &buf);
|
||||||
|
if (res < 0)
|
||||||
|
return(NULL);
|
||||||
|
len = buf.st_size+1;
|
||||||
|
if (len < MINLEN)
|
||||||
|
len = MINLEN;
|
||||||
|
}
|
||||||
|
buffer = (char *)malloc(len*sizeof(char));
|
||||||
|
if (buffer == NULL) {
|
||||||
|
fprintf (stderr, "Cannot malloc\n");
|
||||||
|
perror ("malloc failed");
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
cnt = 0;
|
||||||
#ifdef HAVE_ZLIB_H
|
#ifdef HAVE_ZLIB_H
|
||||||
res = gzread(input, buffer, len);
|
while(!gzeof(input)) {
|
||||||
#else
|
#else
|
||||||
res = read(input, buffer, buf.st_size);
|
while(1) {
|
||||||
|
#endif
|
||||||
|
if (cnt >= len) {
|
||||||
|
len *= 2;
|
||||||
|
nbuf = (char *)realloc(buffer,len*sizeof(char));
|
||||||
|
if (nbuf == NULL) {
|
||||||
|
fprintf(stderr,"Cannot realloc\n");
|
||||||
|
free(buffer);
|
||||||
|
perror ("realloc failed");
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
buffer = nbuf;
|
||||||
|
}
|
||||||
|
#ifdef HAVE_ZLIB_H
|
||||||
|
res = gzread(input, &buffer[cnt], len-cnt);
|
||||||
|
#else
|
||||||
|
res = read(input, &buffer[cnt], len-cnt);
|
||||||
#endif
|
#endif
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
fprintf (stderr, "Cannot read file %s :\n", filename);
|
fprintf (stderr, "Cannot read file %s :\n", filename);
|
||||||
@ -319,19 +358,17 @@ retry_bigger:
|
|||||||
#endif
|
#endif
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
if (res == 0)
|
||||||
|
break;
|
||||||
|
cnt += res;
|
||||||
|
}
|
||||||
#ifdef HAVE_ZLIB_H
|
#ifdef HAVE_ZLIB_H
|
||||||
gzclose(input);
|
gzclose(input);
|
||||||
if (res >= len) {
|
|
||||||
free(buffer);
|
|
||||||
len *= 2;
|
|
||||||
goto retry_bigger;
|
|
||||||
}
|
|
||||||
buf.st_size = res;
|
|
||||||
#else
|
#else
|
||||||
close(input);
|
close(input);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
buffer[buf.st_size] = '\0';
|
buffer[cnt] = '\0';
|
||||||
|
|
||||||
inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
|
inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
|
||||||
if (inputStream == NULL) {
|
if (inputStream == NULL) {
|
||||||
@ -4594,28 +4631,36 @@ xmlCreateFileParserCtxt(const char *filename)
|
|||||||
#endif
|
#endif
|
||||||
int res;
|
int res;
|
||||||
int len;
|
int len;
|
||||||
|
int cnt;
|
||||||
struct stat buf;
|
struct stat buf;
|
||||||
char *buffer;
|
char *buffer, *nbuf;
|
||||||
xmlParserInputPtr inputStream;
|
xmlParserInputPtr inputStream;
|
||||||
xmlCharEncoding enc;
|
xmlCharEncoding enc;
|
||||||
|
|
||||||
res = stat(filename, &buf);
|
#define MINLEN 40000
|
||||||
if (res < 0) return(NULL);
|
|
||||||
|
|
||||||
|
if (strcmp(filename,"-") == 0) {
|
||||||
#ifdef HAVE_ZLIB_H
|
#ifdef HAVE_ZLIB_H
|
||||||
len = (buf.st_size * 8) + 1000;
|
input = gzdopen (fileno(stdin), "r");
|
||||||
retry_bigger:
|
if (input == NULL) {
|
||||||
buffer = malloc(len);
|
fprintf (stderr, "Cannot read from stdin\n");
|
||||||
#else
|
perror ("gzdopen failed");
|
||||||
len = buf.st_size + 100;
|
|
||||||
buffer = malloc(len);
|
|
||||||
#endif
|
|
||||||
if (buffer == NULL) {
|
|
||||||
perror("malloc");
|
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
memset(buffer, 0, len);
|
#ifdef WIN32
|
||||||
|
input = -1;
|
||||||
|
#else
|
||||||
|
input = fileno(stdin);
|
||||||
|
#endif
|
||||||
|
if (input < 0) {
|
||||||
|
fprintf (stderr, "Cannot read from stdin\n");
|
||||||
|
perror ("open failed");
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
len = MINLEN;
|
||||||
|
} else {
|
||||||
#ifdef HAVE_ZLIB_H
|
#ifdef HAVE_ZLIB_H
|
||||||
input = gzopen (filename, "r");
|
input = gzopen (filename, "r");
|
||||||
if (input == NULL) {
|
if (input == NULL) {
|
||||||
@ -4635,10 +4680,41 @@ retry_bigger:
|
|||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
res = stat(filename, &buf);
|
||||||
|
if (res < 0)
|
||||||
|
return(NULL);
|
||||||
|
len = buf.st_size+1;
|
||||||
|
if (len < MINLEN)
|
||||||
|
len = MINLEN;
|
||||||
|
}
|
||||||
|
buffer = (char *)malloc(len*sizeof(char));
|
||||||
|
if (buffer == NULL) {
|
||||||
|
fprintf (stderr, "Cannot malloc\n");
|
||||||
|
perror ("malloc failed");
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
cnt = 0;
|
||||||
#ifdef HAVE_ZLIB_H
|
#ifdef HAVE_ZLIB_H
|
||||||
res = gzread(input, buffer, len);
|
while(!gzeof(input)) {
|
||||||
#else
|
#else
|
||||||
res = read(input, buffer, buf.st_size);
|
while(1) {
|
||||||
|
#endif
|
||||||
|
if (cnt == len) {
|
||||||
|
len *= 2;
|
||||||
|
nbuf = (char *)realloc(buffer,len*sizeof(char));
|
||||||
|
if (nbuf == NULL) {
|
||||||
|
fprintf(stderr,"Cannot realloc\n");
|
||||||
|
free(buffer);
|
||||||
|
perror ("realloc failed");
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
buffer = nbuf;
|
||||||
|
}
|
||||||
|
#ifdef HAVE_ZLIB_H
|
||||||
|
res = gzread(input, &buffer[cnt], len-cnt);
|
||||||
|
#else
|
||||||
|
res = read(input, &buffer[cnt], len-cnt);
|
||||||
#endif
|
#endif
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
fprintf (stderr, "Cannot read file %s :\n", filename);
|
fprintf (stderr, "Cannot read file %s :\n", filename);
|
||||||
@ -4648,20 +4724,18 @@ retry_bigger:
|
|||||||
perror ("read failed");
|
perror ("read failed");
|
||||||
#endif
|
#endif
|
||||||
return(NULL);
|
return(NULL);
|
||||||
|
}
|
||||||
|
if (res == 0)
|
||||||
|
break;
|
||||||
|
cnt += res;
|
||||||
}
|
}
|
||||||
#ifdef HAVE_ZLIB_H
|
#ifdef HAVE_ZLIB_H
|
||||||
gzclose(input);
|
gzclose(input);
|
||||||
if (res >= len) {
|
|
||||||
free(buffer);
|
|
||||||
len *= 2;
|
|
||||||
goto retry_bigger;
|
|
||||||
}
|
|
||||||
buf.st_size = res;
|
|
||||||
#else
|
#else
|
||||||
close(input);
|
close(input);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
buffer[res] = '\0';
|
buffer[cnt] = '\0';
|
||||||
|
|
||||||
ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
|
ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
|
||||||
if (ctxt == NULL) {
|
if (ctxt == NULL) {
|
||||||
|
2
tree.c
2
tree.c
@ -387,8 +387,6 @@ xmlNewDoc(const CHAR *version) {
|
|||||||
|
|
||||||
cur->type = XML_DOCUMENT_NODE;
|
cur->type = XML_DOCUMENT_NODE;
|
||||||
cur->version = xmlStrdup(version);
|
cur->version = xmlStrdup(version);
|
||||||
cur->ID = NULL;
|
|
||||||
cur->DTD = NULL;
|
|
||||||
cur->name = NULL;
|
cur->name = NULL;
|
||||||
cur->root = NULL;
|
cur->root = NULL;
|
||||||
cur->intSubset = NULL;
|
cur->intSubset = NULL;
|
||||||
|
2
tree.h
2
tree.h
@ -230,8 +230,6 @@ typedef struct xmlDoc {
|
|||||||
char *name; /* name/filename/URI of the document */
|
char *name; /* name/filename/URI of the document */
|
||||||
const CHAR *version; /* the XML version string */
|
const CHAR *version; /* the XML version string */
|
||||||
const CHAR *encoding; /* encoding, if any */
|
const CHAR *encoding; /* encoding, if any */
|
||||||
const CHAR *ID; /* the HTML version */
|
|
||||||
const CHAR *DTD; /* the HTML dtd URI */
|
|
||||||
int compression;/* level of zlib compression */
|
int compression;/* level of zlib compression */
|
||||||
int standalone; /* standalone document (no external refs) */
|
int standalone; /* standalone document (no external refs) */
|
||||||
struct xmlDtd *intSubset; /* the document internal subset */
|
struct xmlDtd *intSubset; /* the document internal subset */
|
||||||
|
Reference in New Issue
Block a user