mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Merge branch '10.2' into bb-10.2-mdev9543
- Make Window Functions errors use the MariaDB's extra error range. - Fix a trivial bug in check_error_mesg
This commit is contained in:
463
sql/sql_load.cc
463
sql/sql_load.cc
@ -61,24 +61,128 @@ XML_TAG::XML_TAG(int l, String f, String v)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Field and line terminators must be interpreted as sequence of unsigned char.
|
||||
Otherwise, non-ascii terminators will be negative on some platforms,
|
||||
and positive on others (depending on the implementation of char).
|
||||
*/
|
||||
class Term_string
|
||||
{
|
||||
const uchar *m_ptr;
|
||||
uint m_length;
|
||||
int m_initial_byte;
|
||||
public:
|
||||
Term_string(const String &str) :
|
||||
m_ptr(static_cast<const uchar*>(static_cast<const void*>(str.ptr()))),
|
||||
m_length(str.length()),
|
||||
m_initial_byte((uchar) (str.length() ? str.ptr()[0] : INT_MAX))
|
||||
{ }
|
||||
void set(const uchar *str, uint length, int initial_byte)
|
||||
{
|
||||
m_ptr= str;
|
||||
m_length= length;
|
||||
m_initial_byte= initial_byte;
|
||||
}
|
||||
void reset() { set(NULL, 0, INT_MAX); }
|
||||
const uchar *ptr() const { return m_ptr; }
|
||||
uint length() const { return m_length; }
|
||||
int initial_byte() const { return m_initial_byte; }
|
||||
bool eq(const Term_string &other) const
|
||||
{
|
||||
return length() == other.length() && !memcmp(ptr(), other.ptr(), length());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
|
||||
#define PUSH(A) *(stack_pos++)=(A)
|
||||
|
||||
class READ_INFO {
|
||||
File file;
|
||||
uchar *buffer, /* Buffer for read text */
|
||||
*end_of_buff; /* Data in bufferts ends here */
|
||||
uint buff_length, /* Length of buffert */
|
||||
max_length; /* Max length of row */
|
||||
const uchar *field_term_ptr,*line_term_ptr;
|
||||
const char *line_start_ptr,*line_start_end;
|
||||
uint field_term_length,line_term_length,enclosed_length;
|
||||
int field_term_char,line_term_char,enclosed_char,escape_char;
|
||||
String data; /* Read buffer */
|
||||
uint fixed_length; /* Length of the fixed length record */
|
||||
uint max_length; /* Max length of row */
|
||||
Term_string m_field_term; /* FIELDS TERMINATED BY 'string' */
|
||||
Term_string m_line_term; /* LINES TERMINATED BY 'string' */
|
||||
Term_string m_line_start; /* LINES STARTING BY 'string' */
|
||||
int enclosed_char,escape_char;
|
||||
int *stack,*stack_pos;
|
||||
bool found_end_of_line,start_of_line,eof;
|
||||
NET *io_net;
|
||||
int level; /* for load xml */
|
||||
|
||||
bool getbyte(char *to)
|
||||
{
|
||||
int chr= GET;
|
||||
if (chr == my_b_EOF)
|
||||
return (eof= true);
|
||||
*to= chr;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
Read a tail of a multi-byte character.
|
||||
The first byte of the character is assumed to be already
|
||||
read from the file and appended to "str".
|
||||
|
||||
@returns true - if EOF happened unexpectedly
|
||||
@returns false - no EOF happened: found a good multi-byte character,
|
||||
or a bad byte sequence
|
||||
|
||||
Note:
|
||||
The return value depends only on EOF:
|
||||
- read_mbtail() returns "false" is a good character was read, but also
|
||||
- read_mbtail() returns "false" if an incomplete byte sequence was found
|
||||
and no EOF happened.
|
||||
|
||||
For example, suppose we have an ujis file with bytes 0x8FA10A, where:
|
||||
- 0x8FA1 is an incomplete prefix of a 3-byte character
|
||||
(it should be [8F][A1-FE][A1-FE] to make a full 3-byte character)
|
||||
- 0x0A is a line demiliter
|
||||
This file has some broken data, the trailing [A1-FE] is missing.
|
||||
|
||||
In this example it works as follows:
|
||||
- 0x8F is read from the file and put into "data" before the call
|
||||
for read_mbtail()
|
||||
- 0xA1 is read from the file and put into "data" by read_mbtail()
|
||||
- 0x0A is kept in the read queue, so the next read iteration after
|
||||
the current read_mbtail() call will normally find it and recognize as
|
||||
a line delimiter
|
||||
- the current call for read_mbtail() returns "false",
|
||||
because no EOF happened
|
||||
*/
|
||||
bool read_mbtail(String *str)
|
||||
{
|
||||
int chlen;
|
||||
if ((chlen= my_charlen(read_charset, str->end() - 1, str->end())) == 1)
|
||||
return false; // Single byte character found
|
||||
for (uint32 length0= str->length() - 1 ; MY_CS_IS_TOOSMALL(chlen); )
|
||||
{
|
||||
int chr= GET;
|
||||
if (chr == my_b_EOF)
|
||||
{
|
||||
DBUG_PRINT("info", ("read_mbtail: chlen=%d; unexpected EOF", chlen));
|
||||
return true; // EOF
|
||||
}
|
||||
str->append(chr);
|
||||
chlen= my_charlen(read_charset, str->ptr() + length0, str->end());
|
||||
if (chlen == MY_CS_ILSEQ)
|
||||
{
|
||||
/**
|
||||
It has been an incomplete (but a valid) sequence so far,
|
||||
but the last byte turned it into a bad byte sequence.
|
||||
Unget the very last byte.
|
||||
*/
|
||||
str->length(str->length() - 1);
|
||||
PUSH(chr);
|
||||
DBUG_PRINT("info", ("read_mbtail: ILSEQ"));
|
||||
return false; // Bad byte sequence
|
||||
}
|
||||
}
|
||||
DBUG_PRINT("info", ("read_mbtail: chlen=%d", chlen));
|
||||
return false; // Good multi-byte character
|
||||
}
|
||||
|
||||
public:
|
||||
bool error,line_cuted,found_null,enclosed;
|
||||
uchar *row_start, /* Found row starts here */
|
||||
@ -94,7 +198,11 @@ public:
|
||||
int read_fixed_length(void);
|
||||
int next_line(void);
|
||||
char unescape(char chr);
|
||||
int terminator(const uchar *ptr, uint length);
|
||||
bool terminator(const uchar *ptr, uint length);
|
||||
bool terminator(const Term_string &str)
|
||||
{ return terminator(str.ptr(), str.length()); }
|
||||
bool terminator(int chr, const Term_string &str)
|
||||
{ return str.initial_byte() == chr && terminator(str); }
|
||||
bool find_start_of_fields();
|
||||
/* load xml */
|
||||
List<XML_TAG> taglist;
|
||||
@ -1344,63 +1452,40 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs,
|
||||
String &field_term, String &line_start, String &line_term,
|
||||
String &enclosed_par, int escape, bool get_it_from_net,
|
||||
bool is_fifo)
|
||||
:file(file_par), buffer(NULL), buff_length(tot_length), escape_char(escape),
|
||||
found_end_of_line(false), eof(false),
|
||||
:file(file_par), fixed_length(tot_length),
|
||||
m_field_term(field_term), m_line_term(line_term), m_line_start(line_start),
|
||||
escape_char(escape), found_end_of_line(false), eof(false),
|
||||
error(false), line_cuted(false), found_null(false), read_charset(cs)
|
||||
{
|
||||
data.set_thread_specific();
|
||||
/*
|
||||
Field and line terminators must be interpreted as sequence of unsigned char.
|
||||
Otherwise, non-ascii terminators will be negative on some platforms,
|
||||
and positive on others (depending on the implementation of char).
|
||||
*/
|
||||
field_term_ptr=
|
||||
static_cast<const uchar*>(static_cast<const void*>(field_term.ptr()));
|
||||
field_term_length= field_term.length();
|
||||
line_term_ptr=
|
||||
static_cast<const uchar*>(static_cast<const void*>(line_term.ptr()));
|
||||
line_term_length= line_term.length();
|
||||
|
||||
level= 0; /* for load xml */
|
||||
if (line_start.length() == 0)
|
||||
{
|
||||
line_start_ptr=0;
|
||||
start_of_line= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
line_start_ptr= line_start.ptr();
|
||||
line_start_end=line_start_ptr+line_start.length();
|
||||
start_of_line= 1;
|
||||
}
|
||||
start_of_line= line_start.length() != 0;
|
||||
/* If field_terminator == line_terminator, don't use line_terminator */
|
||||
if (field_term_length == line_term_length &&
|
||||
!memcmp(field_term_ptr,line_term_ptr,field_term_length))
|
||||
{
|
||||
line_term_length=0;
|
||||
line_term_ptr= NULL;
|
||||
}
|
||||
enclosed_char= (enclosed_length=enclosed_par.length()) ?
|
||||
(uchar) enclosed_par[0] : INT_MAX;
|
||||
field_term_char= field_term_length ? field_term_ptr[0] : INT_MAX;
|
||||
line_term_char= line_term_length ? line_term_ptr[0] : INT_MAX;
|
||||
if (m_field_term.eq(m_line_term))
|
||||
m_line_term.reset();
|
||||
enclosed_char= enclosed_par.length() ? (uchar) enclosed_par[0] : INT_MAX;
|
||||
|
||||
/* Set of a stack for unget if long terminators */
|
||||
uint length= MY_MAX(cs->mbmaxlen, MY_MAX(field_term_length, line_term_length)) + 1;
|
||||
uint length= MY_MAX(cs->mbmaxlen, MY_MAX(m_field_term.length(),
|
||||
m_line_term.length())) + 1;
|
||||
set_if_bigger(length,line_start.length());
|
||||
stack= stack_pos= (int*) thd->alloc(sizeof(int) * length);
|
||||
|
||||
if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(MY_THREAD_SPECIFIC))))
|
||||
if (data.reserve(tot_length))
|
||||
error=1; /* purecov: inspected */
|
||||
else
|
||||
{
|
||||
end_of_buff=buffer+buff_length;
|
||||
if (init_io_cache(&cache,(get_it_from_net) ? -1 : file, 0,
|
||||
(get_it_from_net) ? READ_NET :
|
||||
(is_fifo ? READ_FIFO : READ_CACHE),0L,1,
|
||||
MYF(MY_WME | MY_THREAD_SPECIFIC)))
|
||||
{
|
||||
my_free(buffer); /* purecov: inspected */
|
||||
buffer= NULL;
|
||||
error=1;
|
||||
}
|
||||
else
|
||||
@ -1423,7 +1508,6 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs,
|
||||
READ_INFO::~READ_INFO()
|
||||
{
|
||||
::end_io_cache(&cache);
|
||||
my_free(buffer);
|
||||
List_iterator<XML_TAG> xmlit(taglist);
|
||||
XML_TAG *t;
|
||||
while ((t= xmlit++))
|
||||
@ -1431,7 +1515,7 @@ READ_INFO::~READ_INFO()
|
||||
}
|
||||
|
||||
|
||||
inline int READ_INFO::terminator(const uchar *ptr,uint length)
|
||||
inline bool READ_INFO::terminator(const uchar *ptr, uint length)
|
||||
{
|
||||
int chr=0; // Keep gcc happy
|
||||
uint i;
|
||||
@ -1443,18 +1527,17 @@ inline int READ_INFO::terminator(const uchar *ptr,uint length)
|
||||
}
|
||||
}
|
||||
if (i == length)
|
||||
return 1;
|
||||
return true;
|
||||
PUSH(chr);
|
||||
while (i-- > 1)
|
||||
PUSH(*--ptr);
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
int READ_INFO::read_field()
|
||||
{
|
||||
int chr,found_enclosed_char;
|
||||
uchar *to,*new_buffer;
|
||||
|
||||
found_null=0;
|
||||
if (found_end_of_line)
|
||||
@ -1473,11 +1556,11 @@ int READ_INFO::read_field()
|
||||
found_end_of_line=eof=1;
|
||||
return 1;
|
||||
}
|
||||
to=buffer;
|
||||
data.length(0);
|
||||
if (chr == enclosed_char)
|
||||
{
|
||||
found_enclosed_char=enclosed_char;
|
||||
*to++=(uchar) chr; // If error
|
||||
data.append(chr); // If error
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1487,7 +1570,8 @@ int READ_INFO::read_field()
|
||||
|
||||
for (;;)
|
||||
{
|
||||
while ( to < end_of_buff)
|
||||
// Make sure we have enough space for the longest multi-byte character.
|
||||
while (data.length() + read_charset->mbmaxlen <= data.alloced_length())
|
||||
{
|
||||
chr = GET;
|
||||
if (chr == my_b_EOF)
|
||||
@ -1496,7 +1580,7 @@ int READ_INFO::read_field()
|
||||
{
|
||||
if ((chr=GET) == my_b_EOF)
|
||||
{
|
||||
*to++= (uchar) escape_char;
|
||||
data.append(escape_char);
|
||||
goto found_eof;
|
||||
}
|
||||
/*
|
||||
@ -1508,24 +1592,24 @@ int READ_INFO::read_field()
|
||||
*/
|
||||
if (escape_char != enclosed_char || chr == escape_char)
|
||||
{
|
||||
*to++ = (uchar) unescape((char) chr);
|
||||
data.append(unescape((char) chr));
|
||||
continue;
|
||||
}
|
||||
PUSH(chr);
|
||||
chr= escape_char;
|
||||
}
|
||||
#ifdef ALLOW_LINESEPARATOR_IN_STRINGS
|
||||
if (chr == line_term_char)
|
||||
if (chr == m_line_term.initial_byte())
|
||||
#else
|
||||
if (chr == line_term_char && found_enclosed_char == INT_MAX)
|
||||
if (chr == m_line_term.initial_byte() && found_enclosed_char == INT_MAX)
|
||||
#endif
|
||||
{
|
||||
if (terminator(line_term_ptr,line_term_length))
|
||||
if (terminator(m_line_term))
|
||||
{ // Maybe unexpected linefeed
|
||||
enclosed=0;
|
||||
found_end_of_line=1;
|
||||
row_start=buffer;
|
||||
row_end= to;
|
||||
row_start= (uchar *) data.ptr();
|
||||
row_end= (uchar *) data.end();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -1533,27 +1617,24 @@ int READ_INFO::read_field()
|
||||
{
|
||||
if ((chr=GET) == found_enclosed_char)
|
||||
{ // Remove dupplicated
|
||||
*to++ = (uchar) chr;
|
||||
data.append(chr);
|
||||
continue;
|
||||
}
|
||||
// End of enclosed field if followed by field_term or line_term
|
||||
if (chr == my_b_EOF ||
|
||||
(chr == line_term_char && terminator(line_term_ptr,
|
||||
line_term_length)))
|
||||
if (chr == my_b_EOF || terminator(chr, m_line_term))
|
||||
{
|
||||
/* Maybe unexpected linefeed */
|
||||
enclosed=1;
|
||||
found_end_of_line=1;
|
||||
row_start=buffer+1;
|
||||
row_end= to;
|
||||
row_start= (uchar *) data.ptr() + 1;
|
||||
row_end= (uchar *) data.end();
|
||||
return 0;
|
||||
}
|
||||
if (chr == field_term_char &&
|
||||
terminator(field_term_ptr,field_term_length))
|
||||
if (terminator(chr, m_field_term))
|
||||
{
|
||||
enclosed=1;
|
||||
row_start=buffer+1;
|
||||
row_end= to;
|
||||
row_start= (uchar *) data.ptr() + 1;
|
||||
row_end= (uchar *) data.end();
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
@ -1564,68 +1645,33 @@ int READ_INFO::read_field()
|
||||
/* copy the found term character to 'to' */
|
||||
chr= found_enclosed_char;
|
||||
}
|
||||
else if (chr == field_term_char && found_enclosed_char == INT_MAX)
|
||||
else if (chr == m_field_term.initial_byte() &&
|
||||
found_enclosed_char == INT_MAX)
|
||||
{
|
||||
if (terminator(field_term_ptr,field_term_length))
|
||||
if (terminator(m_field_term))
|
||||
{
|
||||
enclosed=0;
|
||||
row_start=buffer;
|
||||
row_end= to;
|
||||
row_start= (uchar *) data.ptr();
|
||||
row_end= (uchar *) data.end();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#ifdef USE_MB
|
||||
if (my_mbcharlen(read_charset, chr) > 1 &&
|
||||
to + my_mbcharlen(read_charset, chr) <= end_of_buff)
|
||||
{
|
||||
uchar* p= to;
|
||||
int ml, i;
|
||||
*to++ = chr;
|
||||
|
||||
ml= my_mbcharlen(read_charset, chr);
|
||||
|
||||
for (i= 1; i < ml; i++)
|
||||
{
|
||||
chr= GET;
|
||||
if (chr == my_b_EOF)
|
||||
{
|
||||
/*
|
||||
Need to back up the bytes already ready from illformed
|
||||
multi-byte char
|
||||
*/
|
||||
to-= i;
|
||||
goto found_eof;
|
||||
}
|
||||
*to++ = chr;
|
||||
}
|
||||
if (my_ismbchar(read_charset,
|
||||
(const char *)p,
|
||||
(const char *)to))
|
||||
continue;
|
||||
for (i= 0; i < ml; i++)
|
||||
PUSH(*--to);
|
||||
chr= GET;
|
||||
}
|
||||
#endif
|
||||
*to++ = (uchar) chr;
|
||||
data.append(chr);
|
||||
if (use_mb(read_charset) && read_mbtail(&data))
|
||||
goto found_eof;
|
||||
}
|
||||
/*
|
||||
** We come here if buffer is too small. Enlarge it and continue
|
||||
*/
|
||||
if (!(new_buffer=(uchar*) my_realloc((char*) buffer,buff_length+1+IO_SIZE,
|
||||
MYF(MY_WME | MY_THREAD_SPECIFIC))))
|
||||
return (error=1);
|
||||
to=new_buffer + (to-buffer);
|
||||
buffer=new_buffer;
|
||||
buff_length+=IO_SIZE;
|
||||
end_of_buff=buffer+buff_length;
|
||||
if (data.reserve(IO_SIZE))
|
||||
return (error= 1);
|
||||
}
|
||||
|
||||
found_eof:
|
||||
enclosed=0;
|
||||
found_end_of_line=eof=1;
|
||||
row_start=buffer;
|
||||
row_end=to;
|
||||
row_start= (uchar *) data.ptr();
|
||||
row_end= (uchar *) data.end();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1647,7 +1693,6 @@ found_eof:
|
||||
int READ_INFO::read_fixed_length()
|
||||
{
|
||||
int chr;
|
||||
uchar *to;
|
||||
if (found_end_of_line)
|
||||
return 1; // One have to call next_line
|
||||
|
||||
@ -1658,8 +1703,7 @@ int READ_INFO::read_fixed_length()
|
||||
return 1;
|
||||
}
|
||||
|
||||
to=row_start=buffer;
|
||||
while (to < end_of_buff)
|
||||
for (data.length(0); data.length() < fixed_length ; )
|
||||
{
|
||||
if ((chr=GET) == my_b_EOF)
|
||||
goto found_eof;
|
||||
@ -1667,105 +1711,129 @@ int READ_INFO::read_fixed_length()
|
||||
{
|
||||
if ((chr=GET) == my_b_EOF)
|
||||
{
|
||||
*to++= (uchar) escape_char;
|
||||
data.append(escape_char);
|
||||
goto found_eof;
|
||||
}
|
||||
*to++ =(uchar) unescape((char) chr);
|
||||
data.append((uchar) unescape((char) chr));
|
||||
continue;
|
||||
}
|
||||
if (chr == line_term_char)
|
||||
{
|
||||
if (terminator(line_term_ptr,line_term_length))
|
||||
{ // Maybe unexpected linefeed
|
||||
found_end_of_line=1;
|
||||
row_end= to;
|
||||
return 0;
|
||||
}
|
||||
if (terminator(chr, m_line_term))
|
||||
{ // Maybe unexpected linefeed
|
||||
found_end_of_line= true;
|
||||
break;
|
||||
}
|
||||
*to++ = (uchar) chr;
|
||||
data.append(chr);
|
||||
}
|
||||
row_end=to; // Found full line
|
||||
row_start= (uchar *) data.ptr();
|
||||
row_end= (uchar *) data.end(); // Found full line
|
||||
return 0;
|
||||
|
||||
found_eof:
|
||||
found_end_of_line=eof=1;
|
||||
row_start=buffer;
|
||||
row_end=to;
|
||||
return to == buffer ? 1 : 0;
|
||||
row_start= (uchar *) data.ptr();
|
||||
row_end= (uchar *) data.end();
|
||||
return data.length() == 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
int READ_INFO::next_line()
|
||||
{
|
||||
line_cuted=0;
|
||||
start_of_line= line_start_ptr != 0;
|
||||
start_of_line= m_line_start.length() != 0;
|
||||
if (found_end_of_line || eof)
|
||||
{
|
||||
found_end_of_line=0;
|
||||
return eof;
|
||||
}
|
||||
found_end_of_line=0;
|
||||
if (!line_term_length)
|
||||
if (!m_line_term.length())
|
||||
return 0; // No lines
|
||||
for (;;)
|
||||
{
|
||||
int chr = GET;
|
||||
#ifdef USE_MB
|
||||
if (my_mbcharlen(read_charset, chr) > 1)
|
||||
{
|
||||
for (uint i=1;
|
||||
chr != my_b_EOF && i<my_mbcharlen(read_charset, chr);
|
||||
i++)
|
||||
chr = GET;
|
||||
if (chr == escape_char)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (chr == my_b_EOF)
|
||||
{
|
||||
eof=1;
|
||||
return 1;
|
||||
}
|
||||
if (chr == escape_char)
|
||||
int chlen;
|
||||
char buf[MY_CS_MBMAXLEN];
|
||||
|
||||
if (getbyte(&buf[0]))
|
||||
return 1; // EOF
|
||||
|
||||
if (use_mb(read_charset) &&
|
||||
(chlen= my_charlen(read_charset, buf, buf + 1)) != 1)
|
||||
{
|
||||
line_cuted=1;
|
||||
if (GET == my_b_EOF)
|
||||
return 1;
|
||||
uint i;
|
||||
for (i= 1; MY_CS_IS_TOOSMALL(chlen); )
|
||||
{
|
||||
DBUG_ASSERT(i < sizeof(buf));
|
||||
DBUG_ASSERT(chlen != 1);
|
||||
if (getbyte(&buf[i++]))
|
||||
return 1; // EOF
|
||||
chlen= my_charlen(read_charset, buf, buf + i);
|
||||
}
|
||||
|
||||
/*
|
||||
Either a complete multi-byte sequence,
|
||||
or a broken byte sequence was found.
|
||||
Check if the sequence is a prefix of the "LINES TERMINATED BY" string.
|
||||
*/
|
||||
if ((uchar) buf[0] == m_line_term.initial_byte() &&
|
||||
i <= m_line_term.length() &&
|
||||
!memcmp(buf, m_line_term.ptr(), i))
|
||||
{
|
||||
if (m_line_term.length() == i)
|
||||
{
|
||||
/*
|
||||
We found a "LINES TERMINATED BY" string that consists
|
||||
of a single multi-byte character.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
buf[] is a prefix of "LINES TERMINATED BY".
|
||||
Now check the suffix. Length of the suffix of line_term_ptr
|
||||
that still needs to be checked is (line_term_length - i).
|
||||
Note, READ_INFO::terminator() assumes that the leftmost byte of the
|
||||
argument is already scanned from the file and is checked to
|
||||
be a known prefix (e.g. against line_term.initial_char()).
|
||||
So we need to pass one extra byte.
|
||||
*/
|
||||
if (terminator(m_line_term.ptr() + i - 1,
|
||||
m_line_term.length() - i + 1))
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
Here we have a good multi-byte sequence or a broken byte sequence,
|
||||
and the sequence is not equal to "LINES TERMINATED BY".
|
||||
No needs to check for escape_char, because:
|
||||
- multi-byte escape characters in "FIELDS ESCAPED BY" are not
|
||||
supported and are rejected at parse time.
|
||||
- broken single-byte sequences are not recognized as escapes,
|
||||
they are considered to be a part of the data and are converted to
|
||||
question marks.
|
||||
*/
|
||||
line_cuted= true;
|
||||
continue;
|
||||
}
|
||||
if (chr == line_term_char && terminator(line_term_ptr,line_term_length))
|
||||
if (buf[0] == escape_char)
|
||||
{
|
||||
line_cuted= true;
|
||||
if (GET == my_b_EOF)
|
||||
return 1;
|
||||
continue;
|
||||
}
|
||||
if (terminator(buf[0], m_line_term))
|
||||
return 0;
|
||||
line_cuted=1;
|
||||
line_cuted= true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool READ_INFO::find_start_of_fields()
|
||||
{
|
||||
int chr;
|
||||
try_again:
|
||||
do
|
||||
for (int chr= GET ; chr != my_b_EOF ; chr= GET)
|
||||
{
|
||||
if ((chr=GET) == my_b_EOF)
|
||||
{
|
||||
found_end_of_line=eof=1;
|
||||
return 1;
|
||||
}
|
||||
} while ((char) chr != line_start_ptr[0]);
|
||||
for (const char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++)
|
||||
{
|
||||
chr=GET; // Eof will be checked later
|
||||
if ((char) chr != *ptr)
|
||||
{ // Can't be line_start
|
||||
PUSH(chr);
|
||||
while (--ptr != line_start_ptr)
|
||||
{ // Restart with next char
|
||||
PUSH( *ptr);
|
||||
}
|
||||
goto try_again;
|
||||
}
|
||||
if (terminator(chr, m_line_start))
|
||||
return false;
|
||||
}
|
||||
return 0;
|
||||
return (found_end_of_line= eof= true);
|
||||
}
|
||||
|
||||
|
||||
@ -1846,26 +1914,8 @@ int READ_INFO::read_value(int delim, String *val)
|
||||
int chr;
|
||||
String tmp;
|
||||
|
||||
for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;)
|
||||
for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF; chr= GET)
|
||||
{
|
||||
#ifdef USE_MB
|
||||
if (my_mbcharlen(read_charset, chr) > 1)
|
||||
{
|
||||
DBUG_PRINT("read_xml",("multi byte"));
|
||||
int i, ml= my_mbcharlen(read_charset, chr);
|
||||
for (i= 1; i < ml; i++)
|
||||
{
|
||||
val->append(chr);
|
||||
/*
|
||||
Don't use my_tospace() in the middle of a multi-byte character
|
||||
TODO: check that the multi-byte sequence is valid.
|
||||
*/
|
||||
chr= GET;
|
||||
if (chr == my_b_EOF)
|
||||
return chr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if(chr == '&')
|
||||
{
|
||||
tmp.length(0);
|
||||
@ -1885,8 +1935,11 @@ int READ_INFO::read_value(int delim, String *val)
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
val->append(chr);
|
||||
chr= GET;
|
||||
if (use_mb(read_charset) && read_mbtail(val))
|
||||
return my_b_EOF;
|
||||
}
|
||||
}
|
||||
return my_tospace(chr);
|
||||
}
|
||||
@ -1955,11 +2008,11 @@ int READ_INFO::read_xml(THD *thd)
|
||||
}
|
||||
|
||||
// row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
|
||||
if((tag.length() == line_term_length -2) &&
|
||||
(memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0))
|
||||
if((tag.length() == m_line_term.length() - 2) &&
|
||||
(memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0))
|
||||
{
|
||||
DBUG_PRINT("read_xml", ("start-of-row: %i %s %s",
|
||||
level,tag.c_ptr_safe(), line_term_ptr));
|
||||
level,tag.c_ptr_safe(), m_line_term.ptr()));
|
||||
}
|
||||
|
||||
if(chr == ' ' || chr == '>')
|
||||
@ -2026,8 +2079,8 @@ int READ_INFO::read_xml(THD *thd)
|
||||
chr= my_tospace(GET);
|
||||
}
|
||||
|
||||
if((tag.length() == line_term_length -2) &&
|
||||
(memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0))
|
||||
if((tag.length() == m_line_term.length() - 2) &&
|
||||
(memcmp(tag.ptr(), m_line_term.ptr() + 1, tag.length()) == 0))
|
||||
{
|
||||
DBUG_PRINT("read_xml", ("found end-of-row %i %s",
|
||||
level, tag.c_ptr_safe()));
|
||||
|
Reference in New Issue
Block a user