1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

xmllint: Rework parsing

Merge a few code paths, making options like --valid or --htmlout work
with some other options.

Improve error handling.
This commit is contained in:
Nick Wellnhofer
2024-05-07 15:23:03 +02:00
parent 0bef170486
commit f8ff4d8688

366
xmllint.c
View File

@@ -2134,35 +2134,45 @@ static void doXPathQuery(xmlDocPtr doc, const char *query) {
* Tree Test processing * * Tree Test processing *
* * * *
************************************************************************/ ************************************************************************/
static void parseAndPrintFile(const char *filename, xmlParserCtxtPtr rectxt) {
static xmlDocPtr
parseFile(const char *filename, xmlParserCtxtPtr rectxt) {
xmlParserCtxtPtr ctxt;
xmlDocPtr doc = NULL; xmlDocPtr doc = NULL;
#ifdef LIBXML_TREE_ENABLED
xmlDocPtr tmp;
#endif /* LIBXML_TREE_ENABLED */
if ((timing) && (!repeat))
startTimer();
#ifdef LIBXML_TREE_ENABLED #ifdef LIBXML_TREE_ENABLED
if (filename == NULL) { if ((generate) && (filename == NULL)) {
if (generate) { xmlNodePtr n;
xmlNodePtr n;
doc = xmlNewDoc(BAD_CAST "1.0"); doc = xmlNewDoc(BAD_CAST "1.0");
n = xmlNewDocNode(doc, NULL, BAD_CAST "info", NULL); if (doc == NULL) {
xmlNodeSetContent(n, BAD_CAST "abc"); progresult = XMLLINT_ERR_MEM;
xmlDocSetRootElement(doc, n); return(NULL);
} }
n = xmlNewDocNode(doc, NULL, BAD_CAST "info", NULL);
if (n == NULL) {
xmlFreeDoc(doc);
progresult = XMLLINT_ERR_MEM;
return(NULL);
}
if (xmlNodeSetContent(n, BAD_CAST "abc") < 0) {
xmlFreeNode(n);
xmlFreeDoc(doc);
progresult = XMLLINT_ERR_MEM;
return(NULL);
}
xmlDocSetRootElement(doc, n);
return(doc);
} }
#endif /* LIBXML_TREE_ENABLED */ #endif /* LIBXML_TREE_ENABLED */
#ifdef LIBXML_HTML_ENABLED #ifdef LIBXML_HTML_ENABLED
#ifdef LIBXML_PUSH_ENABLED #ifdef LIBXML_PUSH_ENABLED
else if ((html) && (push)) { if ((html) && (push)) {
FILE *f; FILE *f;
int res; int res;
char chars[4096]; char chars[4096];
htmlParserCtxtPtr ctxt;
if ((filename[0] == '-') && (filename[1] == 0)) { if ((filename[0] == '-') && (filename[1] == 0)) {
f = stdin; f = stdin;
@@ -2170,8 +2180,8 @@ static void parseAndPrintFile(const char *filename, xmlParserCtxtPtr rectxt) {
f = fopen(filename, "rb"); f = fopen(filename, "rb");
if (f == NULL) { if (f == NULL) {
fprintf(ERR_STREAM, "Can't open %s\n", filename); fprintf(ERR_STREAM, "Can't open %s\n", filename);
progresult = XMLLINT_ERR_UNCLASS; progresult = XMLLINT_ERR_RDFILE;
return; return(NULL);
} }
} }
@@ -2182,7 +2192,7 @@ static void parseAndPrintFile(const char *filename, xmlParserCtxtPtr rectxt) {
progresult = XMLLINT_ERR_MEM; progresult = XMLLINT_ERR_MEM;
if (f != stdin) if (f != stdin)
fclose(f); fclose(f);
return; return(NULL);
} }
htmlCtxtUseOptions(ctxt, options); htmlCtxtUseOptions(ctxt, options);
while ((res = fread(chars, 1, pushsize, f)) > 0) { while ((res = fread(chars, 1, pushsize, f)) > 0) {
@@ -2193,23 +2203,26 @@ static void parseAndPrintFile(const char *filename, xmlParserCtxtPtr rectxt) {
htmlFreeParserCtxt(ctxt); htmlFreeParserCtxt(ctxt);
if (f != stdin) if (f != stdin)
fclose(f); fclose(f);
return(doc);
} }
#endif /* LIBXML_PUSH_ENABLED */ #endif /* LIBXML_PUSH_ENABLED */
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
else if ((html) && (memory)) { if ((html) && (memory)) {
int fd; int fd;
struct stat info; struct stat info;
const char *base; const char *base;
if (stat(filename, &info) < 0) if (stat(filename, &info) < 0)
return; return(NULL);
if ((fd = open(filename, O_RDONLY)) < 0) if ((fd = open(filename, O_RDONLY)) < 0)
return; return(NULL);
base = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0) ; base = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0) ;
if (base == (void *) MAP_FAILED) { if (base == (void *) MAP_FAILED) {
close(fd); close(fd);
fprintf(ERR_STREAM, "mmap failure for file %s\n", filename); fprintf(ERR_STREAM, "mmap failure for file %s\n", filename);
progresult = XMLLINT_ERR_RDFILE; progresult = XMLLINT_ERR_RDFILE;
return; return(NULL);
} }
doc = htmlReadMemory((char *) base, info.st_size, filename, doc = htmlReadMemory((char *) base, info.st_size, filename,
@@ -2217,211 +2230,168 @@ static void parseAndPrintFile(const char *filename, xmlParserCtxtPtr rectxt) {
munmap((char *) base, info.st_size); munmap((char *) base, info.st_size);
close(fd); close(fd);
return(doc);
} }
#endif #endif
else if (html) {
if (html) {
if (strcmp(filename, "-") == 0) if (strcmp(filename, "-") == 0)
doc = htmlReadFd(STDIN_FILENO, "-", NULL, options); doc = htmlReadFd(STDIN_FILENO, "-", NULL, options);
else else
doc = htmlReadFile(filename, NULL, options); doc = htmlReadFile(filename, NULL, options);
return(doc);
} }
#endif /* LIBXML_HTML_ENABLED */ #endif /* LIBXML_HTML_ENABLED */
else {
#ifdef LIBXML_PUSH_ENABLED #ifdef LIBXML_PUSH_ENABLED
/* if (push) {
* build an XML tree from a string; FILE *f;
*/ int res;
if (push) { char chars[4096];
FILE *f;
int ret;
int res;
char chars[4096];
xmlParserCtxtPtr ctxt;
/* '-' Usually means stdin -<sven@zen.org> */ if ((filename[0] == '-') && (filename[1] == 0)) {
if ((filename[0] == '-') && (filename[1] == 0)) { f = stdin;
f = stdin; } else {
} else { f = fopen(filename, "rb");
f = fopen(filename, "rb"); if (f == NULL) {
if (f == NULL) { fprintf(ERR_STREAM, "Can't open %s\n", filename);
fprintf(ERR_STREAM, "Can't open %s\n", filename); progresult = XMLLINT_ERR_RDFILE;
progresult = XMLLINT_ERR_UNCLASS; return(NULL);
return; }
} }
}
res = fread(chars, 1, 4, f); res = fread(chars, 1, 4, f);
ctxt = xmlCreatePushParserCtxt(NULL, NULL, ctxt = xmlCreatePushParserCtxt(NULL, NULL,
chars, res, filename); chars, res, filename);
if (ctxt == NULL) { if (ctxt == NULL) {
progresult = XMLLINT_ERR_MEM; progresult = XMLLINT_ERR_MEM;
if (f != stdin)
fclose(f);
return;
}
xmlCtxtUseOptions(ctxt, options);
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
while ((res = fread(chars, 1, pushsize, f)) > 0) {
xmlParseChunk(ctxt, chars, res, 0);
}
xmlParseChunk(ctxt, chars, 0, 1);
doc = ctxt->myDoc;
ret = ctxt->wellFormed;
xmlFreeParserCtxt(ctxt);
if ((!ret) && (!recovery)) {
xmlFreeDoc(doc);
doc = NULL;
}
if (f != stdin) if (f != stdin)
fclose(f); fclose(f);
} else return(NULL);
#endif /* LIBXML_PUSH_ENABLED */ }
if (testIO) { xmlCtxtUseOptions(ctxt, options);
if ((filename[0] == '-') && (filename[1] == 0)) {
doc = xmlReadFd(STDIN_FILENO, "-", NULL, options);
} else {
FILE *f;
f = fopen(filename, "rb"); if (maxAmpl > 0)
if (f != NULL) { xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
if (rectxt == NULL)
doc = xmlReadIO(myRead, myClose, f, filename, NULL,
options);
else
doc = xmlCtxtReadIO(rectxt, myRead, myClose, f,
filename, NULL, options);
} else
doc = NULL;
}
} else if (htmlout) {
xmlParserCtxtPtr ctxt;
if (rectxt == NULL) {
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
progresult = XMLLINT_ERR_MEM;
return;
}
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
} else {
ctxt = rectxt;
}
if (htmlout) {
ctxt->sax->error = xmlHTMLError; ctxt->sax->error = xmlHTMLError;
ctxt->sax->warning = xmlHTMLWarning; ctxt->sax->warning = xmlHTMLWarning;
ctxt->vctxt.error = xmlHTMLValidityError; ctxt->vctxt.error = xmlHTMLValidityError;
ctxt->vctxt.warning = xmlHTMLValidityWarning; ctxt->vctxt.warning = xmlHTMLValidityWarning;
}
if (strcmp(filename, "-") == 0) while ((res = fread(chars, 1, pushsize, f)) > 0) {
doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options); xmlParseChunk(ctxt, chars, res, 0);
else }
doc = xmlCtxtReadFile(ctxt, filename, NULL, options); xmlParseChunk(ctxt, chars, 0, 1);
if (rectxt == NULL) doc = ctxt->myDoc;
xmlFreeParserCtxt(ctxt); if (f != stdin)
fclose(f);
} else
#endif /* LIBXML_PUSH_ENABLED */
{
if (rectxt == NULL) {
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
progresult = XMLLINT_ERR_MEM;
return(NULL);
}
} else {
ctxt = rectxt;
}
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
if (htmlout) {
ctxt->sax->error = xmlHTMLError;
ctxt->sax->warning = xmlHTMLWarning;
ctxt->vctxt.error = xmlHTMLValidityError;
ctxt->vctxt.warning = xmlHTMLValidityWarning;
}
if (testIO) {
FILE *f;
if ((filename[0] == '-') && (filename[1] == 0)) {
f = stdin;
} else {
f = fopen(filename, "rb");
if (f == NULL) {
fprintf(ERR_STREAM, "Can't open %s\n", filename);
progresult = XMLLINT_ERR_RDFILE;
goto error;
}
}
doc = xmlCtxtReadIO(ctxt, myRead, myClose, f, filename, NULL,
options);
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
} else if (memory) { } else if (memory) {
int fd; int fd;
struct stat info; struct stat info;
const char *base; const char *base;
if (stat(filename, &info) < 0)
return;
if ((fd = open(filename, O_RDONLY)) < 0)
return;
base = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0) ;
if (base == (void *) MAP_FAILED) {
close(fd);
fprintf(ERR_STREAM, "mmap failure for file %s\n", filename);
progresult = XMLLINT_ERR_RDFILE;
return;
}
if (rectxt == NULL) { if (stat(filename, &info) < 0)
xmlParserCtxtPtr ctxt; goto error;
if ((fd = open(filename, O_RDONLY)) < 0)
ctxt = xmlNewParserCtxt(); goto error;
if (ctxt == NULL) { base = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0) ;
fprintf(ERR_STREAM, "out of memory\n"); if (base == (void *) MAP_FAILED) {
progresult = XMLLINT_ERR_MEM; close(fd);
return; fprintf(ERR_STREAM, "mmap failure for file %s\n", filename);
} progresult = XMLLINT_ERR_RDFILE;
if (maxAmpl > 0) goto error;
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
doc = xmlCtxtReadMemory(ctxt, base, info.st_size,
filename, NULL, options);
xmlFreeParserCtxt(ctxt);
} else {
doc = xmlCtxtReadMemory(rectxt, (char *) base, info.st_size,
filename, NULL, options);
} }
munmap((char *) base, info.st_size); doc = xmlCtxtReadMemory(ctxt, base, info.st_size, filename, NULL,
close(fd); options);
munmap((char *) base, info.st_size);
close(fd);
#endif #endif
#ifdef LIBXML_VALID_ENABLED } else {
} else if (valid) {
xmlParserCtxtPtr ctxt = NULL;
if (rectxt == NULL) {
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
progresult = XMLLINT_ERR_MEM;
return;
}
} else {
ctxt = rectxt;
}
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
if (strcmp(filename, "-") == 0) if (strcmp(filename, "-") == 0)
doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options); doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
else else
doc = xmlCtxtReadFile(ctxt, filename, NULL, options); doc = xmlCtxtReadFile(ctxt, filename, NULL, options);
}
if (ctxt->valid == 0)
progresult = XMLLINT_ERR_RDFILE;
if (rectxt == NULL)
xmlFreeParserCtxt(ctxt);
#endif /* LIBXML_VALID_ENABLED */
} else {
if (rectxt != NULL) {
if (strcmp(filename, "-") == 0)
doc = xmlCtxtReadFd(rectxt, STDIN_FILENO, "-", NULL,
options);
else
doc = xmlCtxtReadFile(rectxt, filename, NULL, options);
} else {
xmlParserCtxtPtr ctxt;
ctxt = xmlNewParserCtxt();
if (ctxt == NULL) {
fprintf(ERR_STREAM, "out of memory\n");
progresult = XMLLINT_ERR_MEM;
return;
}
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
if (strcmp(filename, "-") == 0)
doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL,
options);
else
doc = xmlCtxtReadFile(ctxt, filename, NULL, options);
xmlFreeParserCtxt(ctxt);
}
}
} }
/*
* If we don't have a document we might as well give up. Do we
* want an error message here? <sven@zen.org> */
if (doc == NULL) { if (doc == NULL) {
progresult = XMLLINT_ERR_UNCLASS; if (ctxt->errNo == XML_ERR_NO_MEMORY)
progresult = XMLLINT_ERR_MEM;
else
progresult = XMLLINT_ERR_RDFILE;
} else {
#ifdef LIBXML_VALID_ENABLED
if ((valid) && (ctxt->valid == 0))
progresult = XMLLINT_ERR_VALID;
#endif /* LIBXML_VALID_ENABLED */
}
error:
if (ctxt != rectxt)
xmlFreeParserCtxt(ctxt);
return(doc);
}
static void
parseAndPrintFile(const char *filename, xmlParserCtxtPtr rectxt) {
xmlDocPtr doc;
if ((timing) && (!repeat))
startTimer();
doc = parseFile(filename, rectxt);
if (doc == NULL) {
if (progresult == XMLLINT_RETURN_OK)
progresult = XMLLINT_ERR_UNCLASS;
return; return;
} }
@@ -2479,6 +2449,8 @@ static void parseAndPrintFile(const char *filename, xmlParserCtxtPtr rectxt) {
* test intermediate copy if needed. * test intermediate copy if needed.
*/ */
if (copy) { if (copy) {
xmlDocPtr tmp;
tmp = doc; tmp = doc;
if (timing) { if (timing) {
startTimer(); startTimer();