1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

I wanted to see the real speed at the SAX interface after a little too

* testSAX.c: I wanted to see the real speed at the SAX interface
  after a little too many Ximianer started complaining about the
  parser speed.
  added a --quiet option:
  paphio:~/XML -> ls -l db100000.xml
  -rw-rw-r--    1 veillard www      20182040 Mar 20 10:30 db100000.xml
  paphio:~/XML -> time ./testSAX --quiet db100000.xml
  3200006 callbacks generated
  real	0m1.270s
  Which means 16MBytes/s and 3Mcallback/s
Daniel
This commit is contained in:
Daniel Veillard
2002-03-20 19:24:21 +00:00
parent c62a147963
commit e50f3b5d54
9 changed files with 479 additions and 173 deletions

View File

@ -1,3 +1,16 @@
Wed Mar 20 20:20:57 CET 2002 Daniel Veillard <daniel@veillard.com>
* testSAX.c: I wanted to see the real speed at the SAX interface
after a little too many Ximianer started complaining about the
parser speed.
added a --quiet option:
paphio:~/XML -> ls -l db100000.xml
-rw-rw-r-- 1 veillard www 20182040 Mar 20 10:30 db100000.xml
paphio:~/XML -> time ./testSAX --quiet db100000.xml
3200006 callbacks generated
real 0m1.270s
Which means 16MBytes/s and 3Mcallback/s
Tue Mar 19 19:33:57 CET 2002 Daniel Veillard <daniel@veillard.com> Tue Mar 19 19:33:57 CET 2002 Daniel Veillard <daniel@veillard.com>
* xpath.c: valgrind spotted another error that time when running * xpath.c: valgrind spotted another error that time when running

View File

@ -147,11 +147,12 @@ PUSH_AND_POP(static, xmlChar*, name)
/* Inported from XML */ /* Inported from XML */
/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ #define CUR ((ctxt->input->cur < ctxt->input->end) ? (*ctxt->input->cur) : 0)
#define CUR ((int) (*ctxt->input->cur))
#define NEXT xmlNextChar(ctxt),ctxt->nbChars++ #define NEXT xmlNextChar(ctxt),ctxt->nbChars++
#define AVAIL (ctxt->input->end - ctxt->input->cur)
#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) #define RAW (ctxt->token ? -1 : \
(ctxt->input->cur < ctxt->input->end) ? (*ctxt->input->cur) : 0)
#define NXT(val) ctxt->input->cur[(val)] #define NXT(val) ctxt->input->cur[(val)]
#define CUR_PTR ctxt->input->cur #define CUR_PTR ctxt->input->cur
@ -3047,8 +3048,8 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
else { else {
/* Dump the bogus attribute string up to the next blank or /* Dump the bogus attribute string up to the next blank or
* the end of the tag. */ * the end of the tag. */
while ((IS_CHAR(CUR)) && !(IS_BLANK(CUR)) && (CUR != '>') while ((AVAIL > 0) && (IS_CHAR(CUR)) && !(IS_BLANK(CUR)) &&
&& ((CUR != '/') || (NXT(1) != '>'))) (CUR != '>') && ((CUR != '/') || (NXT(1) != '>')))
NEXT; NEXT;
} }
@ -4670,6 +4671,8 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
xmlParserInputBufferPush(ctxt->input->buf, size, chunk); xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
ctxt->input->base = ctxt->input->buf->buffer->content + base; ctxt->input->base = ctxt->input->buf->buffer->content + base;
ctxt->input->cur = ctxt->input->base + cur; ctxt->input->cur = ctxt->input->base + cur;
ctxt->input->end = ctxt->input->buf->buffer->content +
ctxt->input->buf->buffer->use;
#ifdef DEBUG_PUSH #ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
#endif #endif

80
error.c
View File

@ -146,41 +146,50 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) {
*/ */
void void
xmlParserPrintFileContext(xmlParserInputPtr input) { xmlParserPrintFileContext(xmlParserInputPtr input)
const xmlChar *cur, *base; {
const xmlChar *cur, *base, *end;
int n; int n;
xmlChar content[81]; xmlChar content[81];
xmlChar *ctnt; xmlChar *ctnt;
if (input == NULL) return; if (input == NULL)
return;
cur = input->cur; cur = input->cur;
base = input->base; base = input->base;
end = input->end;
/* skip backwards over any end-of-lines */ /* skip backwards over any end-of-lines */
while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) { while ((cur > base) && (cur < end)
&& ((*cur == '\n') || (*cur == '\r'))) {
cur--; cur--;
} }
n = 0; n = 0;
/* search backwards for beginning-of-line maximum 80 characters */ /* search backwards for beginning-of-line maximum 80 characters */
while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r')) while ((n++ < 80) && (cur > base) && (cur < end) && (*cur != '\n')
&& (*cur != '\r'))
cur--; cur--;
if ((*cur == '\n') || (*cur == '\r')) cur++; if ((cur > base) && (cur < end) && ((*cur == '\n') || (*cur == '\r')))
cur++;
/* search forward for end-of-line maximum 80 characters */ /* search forward for end-of-line maximum 80 characters */
n = 0; n = 0;
ctnt = content; ctnt = content;
while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) { while ((cur < end) && (*cur != 0) && (*cur != '\n') && (*cur != '\r')
&& (n < 79)) {
*ctnt++ = *cur++; *ctnt++ = *cur++;
n++; n++;
} }
*ctnt = 0; *ctnt = 0;
xmlGenericError(xmlGenericErrorContext,"%s\n", content); xmlGenericError(xmlGenericErrorContext, "%s\n", content);
/* create blank line with problem pointer */ /* create blank line with problem pointer */
cur = input->cur; cur = input->cur;
while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) { while ((cur > base) && (cur < end)
&& ((*cur == '\n') || (*cur == '\r'))) {
cur--; cur--;
} }
n = 0; n = 0;
ctnt = content; ctnt = content;
while ((n++ < 79) && (cur > base) && (*cur != '\n') && (*cur != '\r')) { while ((n++ < 79) && (cur > base) && (cur < end) && (*cur != '\n')
&& (*cur != '\r')) {
*ctnt++ = ' '; *ctnt++ = ' ';
cur--; cur--;
} }
@ -191,58 +200,9 @@ xmlParserPrintFileContext(xmlParserInputPtr input) {
*ctnt = '^'; *ctnt = '^';
*(++ctnt) = 0; *(++ctnt) = 0;
} }
xmlGenericError(xmlGenericErrorContext,"%s\n", content); xmlGenericError(xmlGenericErrorContext, "%s\n", content);
} }
#if 0
/**
* xmlGetVarStr:
* @msg: the message format
* @args: a va_list argument list
*
* SGS contribution
* Get an arbitrary-sized string for an error argument
* The caller must free() the returned string
*/
static char *
xmlGetVarStr(const char * msg, va_list args) {
int size;
int length;
int chars, left;
char *str, *larger;
va_list ap;
str = (char *) xmlMalloc(150);
if (str == NULL)
return(NULL);
size = 150;
length = 0;
while (1) {
left = size - length;
/* Try to print in the allocated space. */
va_start(msg, ap);
chars = vsnprintf(str + length, left, msg, ap);
va_end(ap);
/* If that worked, we're done. */
if ((chars > -1) && (chars < left ))
break;
/* Else try again with more space. */
if (chars > -1) /* glibc 2.1 */
size += chars + 1; /* precisely what is needed */
else /* glibc 2.0 */
size += 100;
if ((larger = (char *) xmlRealloc(str, size)) == NULL) {
xmlFree(str);
return(NULL);
}
str = larger;
}
return(str);
}
#endif
/** /**
* xmlParserError: * xmlParserError:
* @ctx: an XML parser context * @ctx: an XML parser context

View File

@ -402,7 +402,8 @@ struct _xmlRef {
typedef enum { typedef enum {
XML_BUFFER_ALLOC_DOUBLEIT, XML_BUFFER_ALLOC_DOUBLEIT,
XML_BUFFER_ALLOC_EXACT XML_BUFFER_ALLOC_EXACT,
XML_BUFFER_ALLOC_UNMUTABLE
} xmlBufferAllocationScheme; } xmlBufferAllocationScheme;
/** /**

View File

@ -268,29 +268,32 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
* GROW, SHRINK handling of input buffers * GROW, SHRINK handling of input buffers
*/ */
#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) #define AVAIL (ctxt->input->end - ctxt->input->cur)
#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) #define RAW (ctxt->token ? -1 : \
(ctxt->input->cur < ctxt->input->end) ? (*ctxt->input->cur) : 0)
#define CUR (ctxt->token ? ctxt->token : \
(ctxt->input->cur < ctxt->input->end) ? (*ctxt->input->cur) : 0)
#define NXT(val) ctxt->input->cur[(val)] #define NXT(val) ctxt->input->cur[(val)]
#define CUR_PTR ctxt->input->cur #define CUR_PTR ctxt->input->cur
#define SKIP(val) do { \ #define SKIP(val) do { \
ctxt->nbChars += (val),ctxt->input->cur += (val); \ ctxt->nbChars += (val),ctxt->input->cur += (val); \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
if ((*ctxt->input->cur == 0) && \ if ((ctxt->input->cur >= ctxt->input->end) && \
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
xmlPopInput(ctxt); \ xmlPopInput(ctxt); \
} while (0) } while (0)
#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\ #define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
xmlParserInputShrink(ctxt->input); \ xmlParserInputShrink(ctxt->input); \
if ((*ctxt->input->cur == 0) && \ if ((ctxt->input->cur >= ctxt->input->end) && \
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
xmlPopInput(ctxt); \ xmlPopInput(ctxt); \
} }
#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \ #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
if ((*ctxt->input->cur == 0) && \ if ((ctxt->input->cur >= ctxt->input->end) && \
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
xmlPopInput(ctxt); \ xmlPopInput(ctxt); \
} }
@ -302,7 +305,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
#define NEXT1 { \ #define NEXT1 { \
ctxt->input->cur++; \ ctxt->input->cur++; \
ctxt->nbChars++; \ ctxt->nbChars++; \
if (*ctxt->input->cur == 0) \ if (ctxt->input->cur >= ctxt->input->end) \
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
} }
@ -409,7 +412,7 @@ xmlPopInput(xmlParserCtxtPtr ctxt) {
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"Popping input %d\n", ctxt->inputNr); "Popping input %d\n", ctxt->inputNr);
xmlFreeInputStream(inputPop(ctxt)); xmlFreeInputStream(inputPop(ctxt));
if ((*ctxt->input->cur == 0) && if ((ctxt->input->cur >= ctxt->input->end) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
return(xmlPopInput(ctxt)); return(xmlPopInput(ctxt));
return(CUR); return(CUR);
@ -467,7 +470,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
/* /*
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
*/ */
if ((RAW == '&') && (NXT(1) == '#') && GROW;
if ((AVAIL >= 5) && (RAW == '&') && (NXT(1) == '#') &&
(NXT(2) == 'x')) { (NXT(2) == 'x')) {
SKIP(3); SKIP(3);
GROW; GROW;
@ -500,7 +504,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
ctxt->nbChars ++; ctxt->nbChars ++;
ctxt->input->cur++; ctxt->input->cur++;
} }
} else if ((RAW == '&') && (NXT(1) == '#')) { } else if ((AVAIL >= 4) && (RAW == '&') && (NXT(1) == '#')) {
SKIP(2); SKIP(2);
GROW; GROW;
while (RAW != ';') { /* loop blocked by count */ while (RAW != ';') { /* loop blocked by count */
@ -836,6 +840,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
* plug some encoding conversion routines. * plug some encoding conversion routines.
*/ */
GROW GROW
if (AVAIL > 4) {
start[0] = RAW; start[0] = RAW;
start[1] = NXT(1); start[1] = NXT(1);
start[2] = NXT(2); start[2] = NXT(2);
@ -844,8 +849,10 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
if (enc != XML_CHAR_ENCODING_NONE) { if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc); xmlSwitchEncoding(ctxt, enc);
} }
}
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
(AVAIL >= 6) &&
(RAW == '<') && (NXT(1) == '?') && (RAW == '<') && (NXT(1) == '?') &&
(NXT(2) == 'x') && (NXT(3) == 'm') && (NXT(2) == 'x') && (NXT(3) == 'm') &&
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
@ -1526,7 +1533,7 @@ static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
/* /*
* Otherwise, heuristic :-\ * Otherwise, heuristic :-\
*/ */
if (RAW != '<') return(0); if ((AVAIL < 2) || (RAW != '<')) return(0);
if ((ctxt->node->children == NULL) && if ((ctxt->node->children == NULL) &&
(RAW == '<') && (NXT(1) == '/')) return(0); (RAW == '<') && (NXT(1) == '/')) return(0);
@ -2555,7 +2562,7 @@ void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
void void
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
const xmlChar *in; const xmlChar *in, *end;
int nbchar = 0; int nbchar = 0;
int line = ctxt->input->line; int line = ctxt->input->line;
int col = ctxt->input->col; int col = ctxt->input->col;
@ -2568,21 +2575,24 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
*/ */
if ((ctxt->token == 0) && (!cdata)) { if ((ctxt->token == 0) && (!cdata)) {
in = ctxt->input->cur; in = ctxt->input->cur;
end = ctxt->input->end;
do { do {
get_more: get_more:
while (((*in >= 0x20) && (*in != '<') && (*in != ']') && while ((in < end) &&
(*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
(*in != '&') && (*in <= 0x7F)) || (*in == 0x09)))
in++; in++;
if (*in == 0xA) { if (in >= end) {
end = ctxt->input->end;
} else if (*in == 0xA) {
ctxt->input->line++; ctxt->input->line++;
in++; in++;
while (*in == 0xA) { while ((in < end) && (*in == 0xA)) {
ctxt->input->line++; ctxt->input->line++;
in++; in++;
} }
goto get_more; goto get_more;
} } else if (*in == ']') {
if (*in == ']') {
if ((in[1] == ']') && (in[2] == '>')) { if ((in[1] == ']') && (in[2] == '>')) {
ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
@ -2621,6 +2631,7 @@ get_more:
} }
} }
ctxt->input->cur = in; ctxt->input->cur = in;
if (in < ctxt->input->end) {
if (*in == 0xD) { if (*in == 0xD) {
in++; in++;
if (*in == 0xA) { if (*in == 0xA) {
@ -2637,10 +2648,12 @@ get_more:
if (*in == '&') { if (*in == '&') {
return; return;
} }
}
SHRINK; SHRINK;
GROW; GROW;
in = ctxt->input->cur; in = ctxt->input->cur;
} while ((*in >= 0x20) && (*in <= 0x7F)); end = ctxt->input->end;
} while ((in < end) && (*in >= 0x20) && (*in <= 0x7F));
nbchar = 0; nbchar = 0;
} }
ctxt->input->line = line; ctxt->input->line = line;
@ -6746,7 +6759,7 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
void void
xmlParseContent(xmlParserCtxtPtr ctxt) { xmlParseContent(xmlParserCtxtPtr ctxt) {
GROW; GROW;
while (((RAW != 0) || (ctxt->token != 0)) && while ((AVAIL > 0) && ((RAW != 0) || (ctxt->token != 0)) &&
((RAW != '<') || (NXT(1) != '/'))) { ((RAW != '<') || (NXT(1) != '/'))) {
const xmlChar *test = CUR_PTR; const xmlChar *test = CUR_PTR;
int cons = ctxt->input->consumed; int cons = ctxt->input->consumed;
@ -6813,7 +6826,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
/* /*
* Pop-up of finished entities. * Pop-up of finished entities.
*/ */
while ((RAW == 0) && (ctxt->inputNr > 1)) while (((AVAIL == 0) || (RAW == 0)) && (ctxt->inputNr > 1))
xmlPopInput(ctxt); xmlPopInput(ctxt);
SHRINK; SHRINK;
@ -7527,13 +7540,16 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
void void
xmlParseMisc(xmlParserCtxtPtr ctxt) { xmlParseMisc(xmlParserCtxtPtr ctxt) {
while (((RAW == '<') && (NXT(1) == '?')) || if (AVAIL < 4) {
((RAW == '<') && (NXT(1) == '!') && GROW;
(NXT(2) == '-') && (NXT(3) == '-')) || }
IS_BLANK(CUR)) { while (((AVAIL >= 2) && ((RAW == '<') && (NXT(1) == '?'))) ||
if ((RAW == '<') && (NXT(1) == '?')) { ((AVAIL >= 4) && ((RAW == '<') && (NXT(1) == '!') &&
(NXT(2) == '-') && (NXT(3) == '-'))) ||
((AVAIL > 0) && (IS_BLANK(CUR)))) {
if ((AVAIL >= 2) && (RAW == '<') && (NXT(1) == '?')) {
xmlParsePI(ctxt); xmlParsePI(ctxt);
} else if (IS_BLANK(CUR)) { } else if ((AVAIL > 0) && (IS_BLANK(CUR))) {
NEXT; NEXT;
} else } else
xmlParseComment(ctxt); xmlParseComment(ctxt);
@ -7683,7 +7699,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
*/ */
xmlParseMisc(ctxt); xmlParseMisc(ctxt);
if (RAW != 0) { if ((AVAIL > 0) && (RAW != 0)) {
ctxt->errNo = XML_ERR_DOCUMENT_END; ctxt->errNo = XML_ERR_DOCUMENT_END;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, ctxt->sax->error(ctxt->userData,
@ -9525,14 +9541,14 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
xmlParseContent(ctxt); xmlParseContent(ctxt);
if ((RAW == '<') && (NXT(1) == '/')) { if ((AVAIL > 2) && (RAW == '<') && (NXT(1) == '/')) {
ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, ctxt->sax->error(ctxt->userData,
"chunk is not well balanced\n"); "chunk is not well balanced\n");
ctxt->wellFormed = 0; ctxt->wellFormed = 0;
ctxt->disableSAX = 1; ctxt->disableSAX = 1;
} else if (RAW != 0) { } else if ((AVAIL > 0) && (RAW != 0)) {
ctxt->errNo = XML_ERR_EXTRA_CONTENT; ctxt->errNo = XML_ERR_EXTRA_CONTENT;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, ctxt->sax->error(ctxt->userData,

View File

@ -993,6 +993,7 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
if (in->base == NULL) return(-1); if (in->base == NULL) return(-1);
if (in->cur == NULL) return(-1); if (in->cur == NULL) return(-1);
if (in->buf->buffer == NULL) return(-1); if (in->buf->buffer == NULL) return(-1);
if (in->buf->buffer->alloc == XML_BUFFER_ALLOC_UNMUTABLE) return(-1);
CHECK_BUFFER(in); CHECK_BUFFER(in);
@ -1048,6 +1049,7 @@ xmlParserInputShrink(xmlParserInputPtr in) {
if (in->base == NULL) return; if (in->base == NULL) return;
if (in->cur == NULL) return; if (in->cur == NULL) return;
if (in->buf->buffer == NULL) return; if (in->buf->buffer == NULL) return;
if (in->buf->buffer->alloc == XML_BUFFER_ALLOC_UNMUTABLE) return;
CHECK_BUFFER(in); CHECK_BUFFER(in);
@ -1101,9 +1103,13 @@ xmlParserInputShrink(xmlParserInputPtr in) {
void void
xmlNextChar(xmlParserCtxtPtr ctxt) { xmlNextChar(xmlParserCtxtPtr ctxt) {
int avail;
if (ctxt->instate == XML_PARSER_EOF) if (ctxt->instate == XML_PARSER_EOF)
return; return;
avail = ctxt->input->end - ctxt->input->cur;
/* /*
* 2.11 End-of-Line Handling * 2.11 End-of-Line Handling
* the literal two-character sequence "#xD#xA" or a standalone * the literal two-character sequence "#xD#xA" or a standalone
@ -1112,7 +1118,8 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
*/ */
if (ctxt->token != 0) ctxt->token = 0; if (ctxt->token != 0) ctxt->token = 0;
else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
if ((*ctxt->input->cur == 0) && if (((ctxt->input->cur >= ctxt->input->end) ||
(*ctxt->input->cur == 0)) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
(ctxt->instate != XML_PARSER_COMMENT)) { (ctxt->instate != XML_PARSER_COMMENT)) {
/* /*
@ -1126,6 +1133,14 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
if (*(ctxt->input->cur) == '\n') { if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++; ctxt->input->col = 1; ctxt->input->line++; ctxt->input->col = 1;
} else ctxt->input->col++; } else ctxt->input->col++;
if (avail < 4) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
avail = ctxt->input->end - ctxt->input->cur;
}
if (avail < 1)
return;
if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
/* /*
* We are supposed to handle UTF8, check it's valid * We are supposed to handle UTF8, check it's valid
@ -1143,21 +1158,15 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
c = *cur; c = *cur;
if (c & 0x80) { if (c & 0x80) {
if (cur[1] == 0) if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error; goto encoding_error;
if ((c & 0xe0) == 0xe0) { if ((c & 0xe0) == 0xe0) {
unsigned int val; unsigned int val;
if (cur[2] == 0) if ((avail < 3) || ((cur[2] & 0xc0) != 0x80))
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if ((cur[2] & 0xc0) != 0x80)
goto encoding_error; goto encoding_error;
if ((c & 0xf0) == 0xf0) { if ((c & 0xf0) == 0xf0) {
if (cur[3] == 0) if ((avail < 4) || ((c & 0xf8) != 0xf0) ||
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if (((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80)) ((cur[3] & 0xc0) != 0x80))
goto encoding_error; goto encoding_error;
/* 4-byte code */ /* 4-byte code */
@ -1199,18 +1208,18 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
ctxt->input->cur++; ctxt->input->cur++;
} }
ctxt->nbChars++; ctxt->nbChars++;
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
} }
} else { } else {
ctxt->input->cur++; ctxt->input->cur++;
ctxt->nbChars++; ctxt->nbChars++;
if (*ctxt->input->cur == 0) if (avail < 1) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
avail = ctxt->input->end - ctxt->input->cur;
} }
if ((*ctxt->input->cur == '%') && (!ctxt->html)) }
if ((avail > 1) && (*ctxt->input->cur == '%') && (!ctxt->html))
xmlParserHandlePEReference(ctxt); xmlParserHandlePEReference(ctxt);
if ((*ctxt->input->cur == 0) && if ((avail < 1) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
xmlPopInput(ctxt); xmlPopInput(ctxt);
return; return;
@ -1223,16 +1232,33 @@ encoding_error:
* encoding !) * encoding !)
*/ */
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
xmlChar bytes[4];
if (avail > 3)
bytes[3] = ctxt->input->cur[3];
else
bytes[3] = 0;
if (avail > 2)
bytes[2] = ctxt->input->cur[2];
else
bytes[2] = 0;
if (avail > 1)
bytes[1] = ctxt->input->cur[1];
else
bytes[1] = 0;
if (avail > 0)
bytes[0] = ctxt->input->cur[0];
else
bytes[0] = 0;
ctxt->sax->error(ctxt->userData, ctxt->sax->error(ctxt->userData,
"Input is not proper UTF-8, indicate encoding !\n"); "Input is not proper UTF-8, indicate encoding !\n");
ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1], bytes[0], bytes[1],bytes[2],bytes[3]);
ctxt->input->cur[2], ctxt->input->cur[3]);
} }
ctxt->wellFormed = 0; ctxt->wellFormed = 0;
ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->errNo = XML_ERR_INVALID_ENCODING;
ctxt->charset = XML_CHAR_ENCODING_8859_1; ctxt->charset = XML_CHAR_ENCODING_8859_1;
if (avail > 1)
ctxt->input->cur++; ctxt->input->cur++;
return; return;
} }
@ -1257,6 +1283,8 @@ encoding_error:
int int
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
int avail;
if (ctxt->instate == XML_PARSER_EOF) if (ctxt->instate == XML_PARSER_EOF)
return(0); return(0);
@ -1264,7 +1292,16 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
*len = 0; *len = 0;
return(ctxt->token); return(ctxt->token);
} }
if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { avail = ctxt->input->end - ctxt->input->cur;
if (avail < 4) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
avail = ctxt->input->end - ctxt->input->cur;
}
if (avail < 1)
return(0);
if ((avail > 1) &&
(*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
*len = 1; *len = 1;
return((int) *ctxt->input->cur); return((int) *ctxt->input->cur);
} }
@ -1286,20 +1323,15 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
c = *cur; c = *cur;
if (c & 0x80) { if (c & 0x80) {
if (cur[1] == 0) if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error; goto encoding_error;
if ((c & 0xe0) == 0xe0) { if ((c & 0xe0) == 0xe0) {
if ((avail < 3) || ((cur[2] & 0xc0) != 0x80))
if (cur[2] == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if ((cur[2] & 0xc0) != 0x80)
goto encoding_error; goto encoding_error;
if ((c & 0xf0) == 0xf0) { if ((c & 0xf0) == 0xf0) {
if (cur[3] == 0) if (cur[3] == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if (((c & 0xf8) != 0xf0) || if ((avail < 4) || ((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80)) ((cur[3] & 0xc0) != 0x80))
goto encoding_error; goto encoding_error;
/* 4-byte code */ /* 4-byte code */
@ -1335,7 +1367,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
/* 1-byte code */ /* 1-byte code */
*len = 1; *len = 1;
if (*ctxt->input->cur == 0xD) { if (*ctxt->input->cur == 0xD) {
if (ctxt->input->cur[1] == 0xA) { if ((avail > 1) && (ctxt->input->cur[1] == 0xA)) {
ctxt->nbChars++; ctxt->nbChars++;
ctxt->input->cur++; ctxt->input->cur++;
} }
@ -1351,7 +1383,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
*/ */
*len = 1; *len = 1;
if (*ctxt->input->cur == 0xD) { if (*ctxt->input->cur == 0xD) {
if (ctxt->input->cur[1] == 0xA) { if ((avail > 1) && (ctxt->input->cur[1] == 0xA)) {
ctxt->nbChars++; ctxt->nbChars++;
ctxt->input->cur++; ctxt->input->cur++;
} }
@ -1367,16 +1399,36 @@ encoding_error:
* encoding !) * encoding !)
*/ */
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
xmlChar bytes[4];
if (avail > 3)
bytes[3] = ctxt->input->cur[3];
else
bytes[3] = 0;
if (avail > 2)
bytes[2] = ctxt->input->cur[2];
else
bytes[2] = 0;
if (avail > 1)
bytes[1] = ctxt->input->cur[1];
else
bytes[1] = 0;
if (avail > 0)
bytes[0] = ctxt->input->cur[0];
else
bytes[0] = 0;
ctxt->sax->error(ctxt->userData, ctxt->sax->error(ctxt->userData,
"Input is not proper UTF-8, indicate encoding !\n"); "Input is not proper UTF-8, indicate encoding !\n");
ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1], bytes[0], bytes[1],bytes[2],bytes[3]);
ctxt->input->cur[2], ctxt->input->cur[3]);
} }
ctxt->wellFormed = 0; ctxt->wellFormed = 0;
ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->errNo = XML_ERR_INVALID_ENCODING;
ctxt->charset = XML_CHAR_ENCODING_8859_1; ctxt->charset = XML_CHAR_ENCODING_8859_1;
if (avail < 1) {
*len = 0;
return(0);
}
*len = 1; *len = 1;
return((int) *ctxt->input->cur); return((int) *ctxt->input->cur);
} }

View File

@ -45,6 +45,8 @@ static int recovery = 0;
static int push = 0; static int push = 0;
static int speed = 0; static int speed = 0;
static int noent = 0; static int noent = 0;
static int quiet = 0;
static int callbacks = 0;
xmlSAXHandler emptySAXHandlerStruct = { xmlSAXHandler emptySAXHandlerStruct = {
NULL, /* internalSubset */ NULL, /* internalSubset */
@ -97,6 +99,9 @@ extern xmlSAXHandlerPtr debugSAXHandler;
static int static int
isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED) isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
{ {
callbacks++;
if (quiet)
return(0);
fprintf(stdout, "SAX.isStandalone()\n"); fprintf(stdout, "SAX.isStandalone()\n");
return(0); return(0);
} }
@ -112,6 +117,9 @@ isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
static int static int
hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED) hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
{ {
callbacks++;
if (quiet)
return(0);
fprintf(stdout, "SAX.hasInternalSubset()\n"); fprintf(stdout, "SAX.hasInternalSubset()\n");
return(0); return(0);
} }
@ -127,6 +135,9 @@ hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
static int static int
hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED) hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
{ {
callbacks++;
if (quiet)
return(0);
fprintf(stdout, "SAX.hasExternalSubset()\n"); fprintf(stdout, "SAX.hasExternalSubset()\n");
return(0); return(0);
} }
@ -141,6 +152,9 @@ static void
internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID) const xmlChar *ExternalID, const xmlChar *SystemID)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.internalSubset(%s,", name); fprintf(stdout, "SAX.internalSubset(%s,", name);
if (ExternalID == NULL) if (ExternalID == NULL)
fprintf(stdout, " ,"); fprintf(stdout, " ,");
@ -162,6 +176,9 @@ static void
externalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, externalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID) const xmlChar *ExternalID, const xmlChar *SystemID)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.externalSubset(%s,", name); fprintf(stdout, "SAX.externalSubset(%s,", name);
if (ExternalID == NULL) if (ExternalID == NULL)
fprintf(stdout, " ,"); fprintf(stdout, " ,");
@ -190,6 +207,9 @@ externalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
static xmlParserInputPtr static xmlParserInputPtr
resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId) resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
{ {
callbacks++;
if (quiet)
return(NULL);
/* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
@ -222,6 +242,9 @@ resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xm
static xmlEntityPtr static xmlEntityPtr
getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
{ {
callbacks++;
if (quiet)
return(NULL);
fprintf(stdout, "SAX.getEntity(%s)\n", name); fprintf(stdout, "SAX.getEntity(%s)\n", name);
return(NULL); return(NULL);
} }
@ -238,6 +261,9 @@ getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
static xmlEntityPtr static xmlEntityPtr
getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
{ {
callbacks++;
if (quiet)
return(NULL);
fprintf(stdout, "SAX.getParameterEntity(%s)\n", name); fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
return(NULL); return(NULL);
} }
@ -258,6 +284,9 @@ static void
entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type, entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
const xmlChar *publicId, const xmlChar *systemId, xmlChar *content) const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n", fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
name, type, publicId, systemId, content); name, type, publicId, systemId, content);
} }
@ -275,6 +304,9 @@ attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlCha
int type, int def, const xmlChar *defaultValue, int type, int def, const xmlChar *defaultValue,
xmlEnumerationPtr tree ATTRIBUTE_UNUSED) xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
{ {
callbacks++;
if (quiet)
return;
if (defaultValue == NULL) if (defaultValue == NULL)
fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, NULL, ...)\n", fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, NULL, ...)\n",
elem, name, type, def); elem, name, type, def);
@ -296,6 +328,9 @@ static void
elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type, elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
xmlElementContentPtr content ATTRIBUTE_UNUSED) xmlElementContentPtr content ATTRIBUTE_UNUSED)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n", fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
name, type); name, type);
} }
@ -313,6 +348,9 @@ static void
notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
const xmlChar *publicId, const xmlChar *systemId) const xmlChar *publicId, const xmlChar *systemId)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n", fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
(char *) name, (char *) publicId, (char *) systemId); (char *) name, (char *) publicId, (char *) systemId);
} }
@ -332,6 +370,9 @@ unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
const xmlChar *publicId, const xmlChar *systemId, const xmlChar *publicId, const xmlChar *systemId,
const xmlChar *notationName) const xmlChar *notationName)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
(char *) name, (char *) publicId, (char *) systemId, (char *) name, (char *) publicId, (char *) systemId,
(char *) notationName); (char *) notationName);
@ -348,6 +389,9 @@ unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
static void static void
setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED) setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.setDocumentLocator()\n"); fprintf(stdout, "SAX.setDocumentLocator()\n");
} }
@ -360,6 +404,9 @@ setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBU
static void static void
startDocumentDebug(void *ctx ATTRIBUTE_UNUSED) startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.startDocument()\n"); fprintf(stdout, "SAX.startDocument()\n");
} }
@ -372,6 +419,9 @@ startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
static void static void
endDocumentDebug(void *ctx ATTRIBUTE_UNUSED) endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.endDocument()\n"); fprintf(stdout, "SAX.endDocument()\n");
} }
@ -387,6 +437,9 @@ startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar
{ {
int i; int i;
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.startElement(%s", (char *) name); fprintf(stdout, "SAX.startElement(%s", (char *) name);
if (atts != NULL) { if (atts != NULL) {
for (i = 0;(atts[i] != NULL);i++) { for (i = 0;(atts[i] != NULL);i++) {
@ -408,6 +461,9 @@ startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar
static void static void
endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.endElement(%s)\n", (char *) name); fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
} }
@ -426,6 +482,9 @@ charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
char output[40]; char output[40];
int i; int i;
callbacks++;
if (quiet)
return;
for (i = 0;(i<len) && (i < 30);i++) for (i = 0;(i<len) && (i < 30);i++)
output[i] = ch[i]; output[i] = ch[i];
output[i] = 0; output[i] = 0;
@ -443,6 +502,9 @@ charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
static void static void
referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.reference(%s)\n", name); fprintf(stdout, "SAX.reference(%s)\n", name);
} }
@ -462,6 +524,9 @@ ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
char output[40]; char output[40];
int i; int i;
callbacks++;
if (quiet)
return;
for (i = 0;(i<len) && (i < 30);i++) for (i = 0;(i<len) && (i < 30);i++)
output[i] = ch[i]; output[i] = ch[i];
output[i] = 0; output[i] = 0;
@ -481,6 +546,9 @@ static void
processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target, processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
const xmlChar *data) const xmlChar *data)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.processingInstruction(%s, %s)\n", fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
(char *) target, (char *) data); (char *) target, (char *) data);
} }
@ -496,6 +564,9 @@ processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
static void static void
cdataBlockDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value, int len) cdataBlockDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value, int len)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.pcdata(%.20s, %d)\n", fprintf(stdout, "SAX.pcdata(%.20s, %d)\n",
(char *) value, len); (char *) value, len);
} }
@ -510,6 +581,9 @@ cdataBlockDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value, int len)
static void static void
commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value) commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
{ {
callbacks++;
if (quiet)
return;
fprintf(stdout, "SAX.comment(%s)\n", value); fprintf(stdout, "SAX.comment(%s)\n", value);
} }
@ -527,6 +601,9 @@ warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
{ {
va_list args; va_list args;
callbacks++;
if (quiet)
return;
va_start(args, msg); va_start(args, msg);
fprintf(stdout, "SAX.warning: "); fprintf(stdout, "SAX.warning: ");
vfprintf(stdout, msg, args); vfprintf(stdout, msg, args);
@ -547,6 +624,9 @@ errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
{ {
va_list args; va_list args;
callbacks++;
if (quiet)
return;
va_start(args, msg); va_start(args, msg);
fprintf(stdout, "SAX.error: "); fprintf(stdout, "SAX.error: ");
vfprintf(stdout, msg, args); vfprintf(stdout, msg, args);
@ -567,6 +647,9 @@ fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
{ {
va_list args; va_list args;
callbacks++;
if (quiet)
return;
va_start(args, msg); va_start(args, msg);
fprintf(stdout, "SAX.fatalError: "); fprintf(stdout, "SAX.fatalError: ");
vfprintf(stdout, msg, args); vfprintf(stdout, msg, args);
@ -673,18 +756,23 @@ parseAndPrintFile(char *filename) {
/* /*
* Empty callbacks for checking * Empty callbacks for checking
*/ */
if (!quiet) {
res = xmlSAXUserParseFile(emptySAXHandler, NULL, filename); res = xmlSAXUserParseFile(emptySAXHandler, NULL, filename);
if (res != 0) { if (res != 0) {
fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res); fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res);
} }
}
/* /*
* Debug callback * Debug callback
*/ */
callbacks = 0;
res = xmlSAXUserParseFile(debugSAXHandler, NULL, filename); res = xmlSAXUserParseFile(debugSAXHandler, NULL, filename);
if (res != 0) { if (res != 0) {
fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res); fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res);
} }
if (quiet)
fprintf(stdout, "%d callbacks generated\n", callbacks);
} else { } else {
/* /*
* test 100x the SAX parse * test 100x the SAX parse
@ -722,6 +810,9 @@ int main(int argc, char **argv) {
else if ((!strcmp(argv[i], "-noent")) || else if ((!strcmp(argv[i], "-noent")) ||
(!strcmp(argv[i], "--noent"))) (!strcmp(argv[i], "--noent")))
noent++; noent++;
else if ((!strcmp(argv[i], "-quiet")) ||
(!strcmp(argv[i], "--quiet")))
quiet++;
} }
if (noent != 0) xmlSubstituteEntitiesDefault(1); if (noent != 0) xmlSubstituteEntitiesDefault(1);
for (i = 1; i < argc ; i++) { for (i = 1; i < argc ; i++) {

32
tree.c
View File

@ -5159,6 +5159,8 @@ xmlBufferCreateSize(size_t size) {
void void
xmlBufferSetAllocationScheme(xmlBufferPtr buf, xmlBufferSetAllocationScheme(xmlBufferPtr buf,
xmlBufferAllocationScheme scheme) { xmlBufferAllocationScheme scheme) {
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE)
return;
if (buf == NULL) { if (buf == NULL) {
#ifdef DEBUG_BUFFER #ifdef DEBUG_BUFFER
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
@ -5186,7 +5188,8 @@ xmlBufferFree(xmlBufferPtr buf) {
#endif #endif
return; return;
} }
if (buf->content != NULL) { if ((buf->content != NULL) &&
(buf->alloc != XML_BUFFER_ALLOC_UNMUTABLE)) {
xmlFree(buf->content); xmlFree(buf->content);
} }
xmlFree(buf); xmlFree(buf);
@ -5200,6 +5203,9 @@ xmlBufferFree(xmlBufferPtr buf) {
*/ */
void void
xmlBufferEmpty(xmlBufferPtr buf) { xmlBufferEmpty(xmlBufferPtr buf) {
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE)
return;
if (buf->content == NULL) return; if (buf->content == NULL) return;
buf->use = 0; buf->use = 0;
memset(buf->content, 0, buf->size); memset(buf->content, 0, buf->size);
@ -5219,6 +5225,12 @@ xmlBufferShrink(xmlBufferPtr buf, unsigned int len) {
if (len == 0) return(0); if (len == 0) return(0);
if (len > buf->use) return(-1); if (len > buf->use) return(-1);
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE) {
buf->content = &buf->content[len];
buf->use -= len;
return(len);
}
buf->use -= len; buf->use -= len;
memmove(buf->content, &buf->content[len], buf->use * sizeof(xmlChar)); memmove(buf->content, &buf->content[len], buf->use * sizeof(xmlChar));
@ -5240,6 +5252,9 @@ xmlBufferGrow(xmlBufferPtr buf, unsigned int len) {
int size; int size;
xmlChar *newbuf; xmlChar *newbuf;
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE)
return(-1);
if (len + buf->use < buf->size) return(0); if (len + buf->use < buf->size) return(0);
size = buf->use + len + 100; size = buf->use + len + 100;
@ -5334,6 +5349,9 @@ xmlBufferResize(xmlBufferPtr buf, unsigned int size)
unsigned int newSize; unsigned int newSize;
xmlChar* rebuf = NULL; xmlChar* rebuf = NULL;
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE)
return 0;
/*take care of empty case*/ /*take care of empty case*/
newSize = (buf->size ? buf->size*2 : size); newSize = (buf->size ? buf->size*2 : size);
@ -5383,6 +5401,9 @@ void
xmlBufferAdd(xmlBufferPtr buf, const xmlChar *str, int len) { xmlBufferAdd(xmlBufferPtr buf, const xmlChar *str, int len) {
unsigned int needSize; unsigned int needSize;
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE)
return;
if (str == NULL) { if (str == NULL) {
#ifdef DEBUG_BUFFER #ifdef DEBUG_BUFFER
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
@ -5431,6 +5452,9 @@ void
xmlBufferAddHead(xmlBufferPtr buf, const xmlChar *str, int len) { xmlBufferAddHead(xmlBufferPtr buf, const xmlChar *str, int len) {
unsigned int needSize; unsigned int needSize;
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE)
return;
if (str == NULL) { if (str == NULL) {
#ifdef DEBUG_BUFFER #ifdef DEBUG_BUFFER
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
@ -5491,6 +5515,9 @@ void
xmlBufferCCat(xmlBufferPtr buf, const char *str) { xmlBufferCCat(xmlBufferPtr buf, const char *str) {
const char *cur; const char *cur;
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE)
return;
if (str == NULL) { if (str == NULL) {
#ifdef DEBUG_BUFFER #ifdef DEBUG_BUFFER
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
@ -5563,6 +5590,9 @@ xmlBufferWriteChar(xmlBufferPtr buf, const char *string) {
*/ */
void void
xmlBufferWriteQuotedString(xmlBufferPtr buf, const xmlChar *string) { xmlBufferWriteQuotedString(xmlBufferPtr buf, const xmlChar *string) {
if (buf->alloc == XML_BUFFER_ALLOC_UNMUTABLE)
return;
if (xmlStrchr(string, '"')) { if (xmlStrchr(string, '"')) {
if (xmlStrchr(string, '\'')) { if (xmlStrchr(string, '\'')) {
#ifdef DEBUG_BUFFER #ifdef DEBUG_BUFFER

156
xmlIO.c
View File

@ -33,6 +33,14 @@
#include <zlib.h> #include <zlib.h>
#endif #endif
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
/* seems needed for Solaris */
#ifndef MAP_FAILED
#define MAP_FAILED ((void *) -1)
#endif
#endif
/* Figure a portable way to know if a file is a directory. */ /* Figure a portable way to know if a file is a directory. */
#ifndef HAVE_STAT #ifndef HAVE_STAT
# ifdef HAVE__STAT # ifdef HAVE__STAT
@ -1947,6 +1955,84 @@ xmlParserInputBufferCreateMem(const char *mem, int size, xmlCharEncoding enc) {
return(ret); return(ret);
} }
#ifdef HAVE_SYS_MMAN_H
typedef struct _xmlMMapContext xmlMMapContext;
typedef xmlMMapContext *xmlMMapContextPtr;
struct _xmlMMapContext {
int fd;
const char *mem;
size_t size;
};
/**
* xmlParserInputBufferCloseMMapFile:
* @ctxt: the mmaped context
*
* Free up the resources associated to the mmaped file
*/
static void
xmlParserInputBufferCloseMMapFile(xmlMMapContextPtr ctxt) {
if (ctxt == NULL)
return;
if (ctxt->mem != (void *) MAP_FAILED)
munmap((char *) ctxt->mem, ctxt->size);
if (ctxt->fd >= 0)
close(ctxt->fd);
xmlFree(ctxt);
}
/**
* xmlParserInputBufferCreateMMapFile:
* @fd: the descriptor associated to the mmaped file.
* @base: the mmaped start
* @size: the length of the memory block
* @enc: the charset encoding if known
*
* Create a buffered parser input for the progressive parsing for the input
* from a memory area.
*
* Returns the new parser input or NULL
*/
static xmlParserInputBufferPtr
xmlParserInputBufferCreateMMapFile(int fd, const char *mem, size_t size,
xmlCharEncoding enc) {
xmlParserInputBufferPtr ret;
xmlMMapContextPtr ctxt;
if (fd < 0) return(NULL);
if (size <= 0) return(NULL);
if (mem == NULL) return(NULL);
ctxt = (xmlMMapContextPtr) xmlMalloc(sizeof(xmlMMapContext));
if (ctxt == NULL)
return(NULL);
ctxt->fd = fd;
ctxt->mem = mem;
ctxt->size = size;
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL) {
ret->context = (void *) ctxt;
ret->readcallback = (xmlInputReadCallback) xmlNop;
ret->closecallback = (xmlInputCloseCallback)
xmlParserInputBufferCloseMMapFile;
if (ret->buffer->content != NULL) {
xmlFree(ret->buffer->content);
}
ret->buffer->alloc = XML_BUFFER_ALLOC_UNMUTABLE;
ret->buffer->content = (xmlChar *) mem;
ret->buffer->size = size;
ret->buffer->use = size;
} else {
xmlFree(ctxt);
return(NULL);
}
return(ret);
}
#endif
/** /**
* xmlOutputBufferCreateFd: * xmlOutputBufferCreateFd:
* @fd: a file descriptor number * @fd: a file descriptor number
@ -2433,8 +2519,7 @@ xmlParserGetDirectory(const char *filename) {
* * * *
****************************************************************/ ****************************************************************/
#ifdef LIBXML_CATALOG_ENABLED static const char * xmlSysIDExists(const char *URL, size_t *size) {
static int xmlSysIDExists(const char *URL) {
#ifdef HAVE_STAT #ifdef HAVE_STAT
int ret; int ret;
struct stat info; struct stat info;
@ -2454,12 +2539,16 @@ static int xmlSysIDExists(const char *URL) {
} else } else
path = URL; path = URL;
ret = stat(path, &info); ret = stat(path, &info);
if (ret == 0) if (ret == 0) {
return(1); if (size)
*size = info.st_size;
return(path);
}
#endif #endif
return(0); if (size)
*size = -1;
return(NULL);
} }
#endif
/** /**
* xmlDefaultExternalEntityLoader: * xmlDefaultExternalEntityLoader:
@ -2480,7 +2569,57 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
#ifdef LIBXML_CATALOG_ENABLED #ifdef LIBXML_CATALOG_ENABLED
xmlCatalogAllow pref; xmlCatalogAllow pref;
#endif #endif
const char *exist;
size_t length;
exist = xmlSysIDExists(URL, &length);
#ifdef HAVE_SYS_MMAN_H
/*
* Shortcut, if asked for a file, the file is present, mmap it !
*/
if ((exist != NULL) && (length > 0)) {
int fd = -1;
const char *base = NULL;
xmlParserInputBufferPtr buf = NULL;
if ((fd = open(exist, O_RDONLY)) >= 0) {
/*
* Magic test: don't drop back native compressed content support
*/
char tmpbuf[2];
if (read(fd, tmpbuf, 2) != 2)
goto failed;
if ((tmpbuf[0] == 0x1F) && (tmpbuf[1] == 0x8B))
goto failed;
base = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (base != (void *) MAP_FAILED) {
buf = xmlParserInputBufferCreateMMapFile(fd, base, length,
XML_CHAR_ENCODING_NONE);
if (buf != NULL) {
ret = xmlNewInputStream(ctxt);
if (ret != NULL) {
ret->filename = (const char *) xmlCharStrdup(exist);
ret->directory = (const char *)
xmlParserGetDirectory(exist);
ret->buf = buf;
ret->base = ret->buf->buffer->content;
ret->cur = ret->buf->buffer->content;
ret->end = &ret->base[ret->buf->buffer->use];
return(ret);
}
}
}
}
failed:
if (buf != NULL)
xmlFreeParserInputBuffer(buf);
if (base != (void *) MAP_FAILED)
munmap((char *) base, length);
if (fd >= 0)
close(fd);
}
#endif
#ifdef DEBUG_EXTERNAL_ENTITIES #ifdef DEBUG_EXTERNAL_ENTITIES
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"xmlDefaultExternalEntityLoader(%s, xxx)\n", URL); "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
@ -2492,7 +2631,7 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
*/ */
pref = xmlCatalogGetDefaults(); pref = xmlCatalogGetDefaults();
if ((pref != XML_CATA_ALLOW_NONE) && (!xmlSysIDExists(URL))) { if ((pref != XML_CATA_ALLOW_NONE) && (exist == NULL)) {
/* /*
* Do a local lookup * Do a local lookup
*/ */
@ -2518,7 +2657,8 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
/* /*
* TODO: do an URI lookup on the reference * TODO: do an URI lookup on the reference
*/ */
if ((resource != NULL) && (!xmlSysIDExists((const char *)resource))) { exist = xmlSysIDExists(URL, &length);
if ((resource != NULL) && (exist == NULL)) {
xmlChar *tmp = NULL; xmlChar *tmp = NULL;
if ((ctxt->catalogs != NULL) && if ((ctxt->catalogs != NULL) &&