mirror of
https://github.com/MariaDB/server.git
synced 2026-01-06 05:22:24 +03:00
Merging mysql-next-mr-merge to mysql-next-mr.
This commit is contained in:
527
sql/sql_load.cc
527
sql/sql_load.cc
@@ -15,6 +15,8 @@
|
||||
|
||||
|
||||
/* Copy data from a textfile to table */
|
||||
/* 2006-12 Erik Wetterberg : LOAD XML added */
|
||||
|
||||
#include "mysql_priv.h"
|
||||
#include <my_dir.h>
|
||||
#include <m_ctype.h>
|
||||
@@ -23,6 +25,23 @@
|
||||
#include "sp_head.h"
|
||||
#include "sql_trigger.h"
|
||||
|
||||
class XML_TAG {
|
||||
public:
|
||||
int level;
|
||||
String field;
|
||||
String value;
|
||||
XML_TAG(int l, String f, String v);
|
||||
};
|
||||
|
||||
|
||||
XML_TAG::XML_TAG(int l, String f, String v)
|
||||
{
|
||||
level= l;
|
||||
field.append(f);
|
||||
value.append(v);
|
||||
}
|
||||
|
||||
|
||||
class READ_INFO {
|
||||
File file;
|
||||
uchar *buffer, /* Buffer for read text */
|
||||
@@ -37,6 +56,7 @@ class READ_INFO {
|
||||
bool need_end_io_cache;
|
||||
IO_CACHE cache;
|
||||
NET *io_net;
|
||||
int level; /* for load xml */
|
||||
|
||||
public:
|
||||
bool error,line_cuted,found_null,enclosed;
|
||||
@@ -54,6 +74,12 @@ public:
|
||||
char unescape(char chr);
|
||||
int terminator(char *ptr,uint length);
|
||||
bool find_start_of_fields();
|
||||
/* load xml */
|
||||
List<XML_TAG> taglist;
|
||||
int read_value(int delim, String *val);
|
||||
int read_xml();
|
||||
int clear_level(int level);
|
||||
|
||||
/*
|
||||
We need to force cache close before destructor is invoked to log
|
||||
the last read block
|
||||
@@ -82,6 +108,13 @@ static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
|
||||
List<Item> &set_values, READ_INFO &read_info,
|
||||
String &enclosed, ulong skip_lines,
|
||||
bool ignore_check_option_errors);
|
||||
|
||||
static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
|
||||
List<Item> &fields_vars, List<Item> &set_fields,
|
||||
List<Item> &set_values, READ_INFO &read_info,
|
||||
String &enclosed, ulong skip_lines,
|
||||
bool ignore_check_option_errors);
|
||||
|
||||
#ifndef EMBEDDED_LIBRARY
|
||||
static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
|
||||
const char* db_arg,
|
||||
@@ -393,7 +426,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
|
||||
thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */
|
||||
thd->cuted_fields=0L;
|
||||
/* Skip lines if there is a line terminator */
|
||||
if (ex->line_term->length())
|
||||
if (ex->line_term->length() && ex->filetype != FILETYPE_XML)
|
||||
{
|
||||
/* ex->skip_lines needs to be preserved for logging */
|
||||
while (skip_lines > 0)
|
||||
@@ -424,7 +457,11 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
|
||||
(MODE_STRICT_TRANS_TABLES |
|
||||
MODE_STRICT_ALL_TABLES)));
|
||||
|
||||
if (!field_term->length() && !enclosed->length())
|
||||
if (ex->filetype == FILETYPE_XML) /* load xml */
|
||||
error= read_xml_field(thd, info, table_list, fields_vars,
|
||||
set_fields, set_values, read_info,
|
||||
*(ex->line_term), skip_lines, ignore);
|
||||
else if (!field_term->length() && !enclosed->length())
|
||||
error= read_fixed_length(thd, info, table_list, fields_vars,
|
||||
set_fields, set_values, read_info,
|
||||
skip_lines, ignore);
|
||||
@@ -1005,6 +1042,171 @@ continue_loop:;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
** Read rows in xml format
|
||||
****************************************************************************/
|
||||
static int
|
||||
read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
|
||||
List<Item> &fields_vars, List<Item> &set_fields,
|
||||
List<Item> &set_values, READ_INFO &read_info,
|
||||
String &row_tag, ulong skip_lines,
|
||||
bool ignore_check_option_errors)
|
||||
{
|
||||
List_iterator_fast<Item> it(fields_vars);
|
||||
Item *item;
|
||||
TABLE *table= table_list->table;
|
||||
bool no_trans_update_stmt;
|
||||
CHARSET_INFO *cs= read_info.read_charset;
|
||||
DBUG_ENTER("read_xml_field");
|
||||
|
||||
no_trans_update_stmt= !table->file->has_transactions();
|
||||
|
||||
for ( ; ; it.rewind())
|
||||
{
|
||||
if (thd->killed)
|
||||
{
|
||||
thd->send_kill_message();
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
// read row tag and save values into tag list
|
||||
if (read_info.read_xml())
|
||||
break;
|
||||
|
||||
List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
|
||||
xmlit.rewind();
|
||||
XML_TAG *tag= NULL;
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines));
|
||||
while ((tag= xmlit++))
|
||||
{
|
||||
DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'",
|
||||
tag->level, tag->field.c_ptr(),
|
||||
tag->value.c_ptr()));
|
||||
}
|
||||
#endif
|
||||
|
||||
restore_record(table, s->default_values);
|
||||
|
||||
while ((item= it++))
|
||||
{
|
||||
/* If this line is to be skipped we don't want to fill field or var */
|
||||
if (skip_lines)
|
||||
continue;
|
||||
|
||||
/* find field in tag list */
|
||||
xmlit.rewind();
|
||||
tag= xmlit++;
|
||||
|
||||
while(tag && strcmp(tag->field.c_ptr(), item->name) != 0)
|
||||
tag= xmlit++;
|
||||
|
||||
if (!tag) // found null
|
||||
{
|
||||
if (item->type() == Item::FIELD_ITEM)
|
||||
{
|
||||
Field *field= ((Item_field *) item)->field;
|
||||
field->reset();
|
||||
field->set_null();
|
||||
if (field == table->next_number_field)
|
||||
table->auto_increment_field_not_null= TRUE;
|
||||
if (!field->maybe_null())
|
||||
{
|
||||
if (field->type() == FIELD_TYPE_TIMESTAMP)
|
||||
((Field_timestamp *) field)->set_time();
|
||||
else if (field != table->next_number_field)
|
||||
field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
|
||||
ER_WARN_NULL_TO_NOTNULL, 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
((Item_user_var_as_out_param *) item)->set_null_value(cs);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item->type() == Item::FIELD_ITEM)
|
||||
{
|
||||
|
||||
Field *field= ((Item_field *)item)->field;
|
||||
field->set_notnull();
|
||||
if (field == table->next_number_field)
|
||||
table->auto_increment_field_not_null= TRUE;
|
||||
field->store((char *) tag->value.ptr(), tag->value.length(), cs);
|
||||
}
|
||||
else
|
||||
((Item_user_var_as_out_param *) item)->set_value(
|
||||
(char *) tag->value.ptr(),
|
||||
tag->value.length(), cs);
|
||||
}
|
||||
|
||||
if (read_info.error)
|
||||
break;
|
||||
|
||||
if (skip_lines)
|
||||
{
|
||||
skip_lines--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item)
|
||||
{
|
||||
/* Have not read any field, thus input file is simply ended */
|
||||
if (item == fields_vars.head())
|
||||
break;
|
||||
|
||||
for ( ; item; item= it++)
|
||||
{
|
||||
if (item->type() == Item::FIELD_ITEM)
|
||||
{
|
||||
/*
|
||||
QQ: We probably should not throw warning for each field.
|
||||
But how about intention to always have the same number
|
||||
of warnings in THD::cuted_fields (and get rid of cuted_fields
|
||||
in the end ?)
|
||||
*/
|
||||
thd->cuted_fields++;
|
||||
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
|
||||
ER_WARN_TOO_FEW_RECORDS,
|
||||
ER(ER_WARN_TOO_FEW_RECORDS),
|
||||
thd->warning_info->current_row_for_warning());
|
||||
}
|
||||
else
|
||||
((Item_user_var_as_out_param *)item)->set_null_value(cs);
|
||||
}
|
||||
}
|
||||
|
||||
if (thd->killed ||
|
||||
fill_record_n_invoke_before_triggers(thd, set_fields, set_values,
|
||||
ignore_check_option_errors,
|
||||
table->triggers,
|
||||
TRG_EVENT_INSERT))
|
||||
DBUG_RETURN(1);
|
||||
|
||||
switch (table_list->view_check_option(thd,
|
||||
ignore_check_option_errors)) {
|
||||
case VIEW_CHECK_SKIP:
|
||||
read_info.next_line();
|
||||
goto continue_loop;
|
||||
case VIEW_CHECK_ERROR:
|
||||
DBUG_RETURN(-1);
|
||||
}
|
||||
|
||||
if (write_record(thd, table, &info))
|
||||
DBUG_RETURN(1);
|
||||
|
||||
/*
|
||||
We don't need to reset auto-increment field since we are restoring
|
||||
its default value at the beginning of each loop iteration.
|
||||
*/
|
||||
thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
|
||||
thd->warning_info->inc_current_row_for_warning();
|
||||
continue_loop:;
|
||||
}
|
||||
DBUG_RETURN(test(read_info.error) || thd->is_error());
|
||||
} /* load xml end */
|
||||
|
||||
|
||||
/* Unescape all escape characters, mark \N as null */
|
||||
|
||||
char
|
||||
@@ -1043,6 +1245,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
|
||||
field_term_length= field_term.length();
|
||||
line_term_ptr=(char*) line_term.ptr();
|
||||
line_term_length= line_term.length();
|
||||
level= 0; /* for load xml */
|
||||
if (line_start.length() == 0)
|
||||
{
|
||||
line_start_ptr=0;
|
||||
@@ -1118,6 +1321,10 @@ READ_INFO::~READ_INFO()
|
||||
my_free((uchar*) buffer,MYF(0));
|
||||
error=1;
|
||||
}
|
||||
List_iterator<XML_TAG> xmlit(taglist);
|
||||
XML_TAG *t;
|
||||
while ((t= xmlit++))
|
||||
delete(t);
|
||||
}
|
||||
|
||||
|
||||
@@ -1450,3 +1657,319 @@ bool READ_INFO::find_start_of_fields()
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Clear taglist from tags with a specified level
|
||||
*/
|
||||
int READ_INFO::clear_level(int level)
|
||||
{
|
||||
DBUG_ENTER("READ_INFO::read_xml clear_level");
|
||||
List_iterator<XML_TAG> xmlit(taglist);
|
||||
xmlit.rewind();
|
||||
XML_TAG *tag;
|
||||
|
||||
while ((tag= xmlit++))
|
||||
{
|
||||
if(tag->level >= level)
|
||||
{
|
||||
xmlit.remove();
|
||||
delete tag;
|
||||
}
|
||||
}
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Convert an XML entity to Unicode value.
|
||||
Return -1 on error;
|
||||
*/
|
||||
static int
|
||||
my_xml_entity_to_char(const char *name, uint length)
|
||||
{
|
||||
if (length == 2)
|
||||
{
|
||||
if (!memcmp(name, "gt", length))
|
||||
return '>';
|
||||
if (!memcmp(name, "lt", length))
|
||||
return '<';
|
||||
}
|
||||
else if (length == 3)
|
||||
{
|
||||
if (!memcmp(name, "amp", length))
|
||||
return '&';
|
||||
}
|
||||
else if (length == 4)
|
||||
{
|
||||
if (!memcmp(name, "quot", length))
|
||||
return '"';
|
||||
if (!memcmp(name, "apos", length))
|
||||
return '\'';
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief Convert newline, linefeed, tab to space
|
||||
|
||||
@param chr character
|
||||
|
||||
@details According to the "XML 1.0" standard,
|
||||
only space (#x20) characters, carriage returns,
|
||||
line feeds or tabs are considered as spaces.
|
||||
Convert all of them to space (#x20) for parsing simplicity.
|
||||
*/
|
||||
static int
|
||||
my_tospace(int chr)
|
||||
{
|
||||
return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Read an xml value: handle multibyte and xml escape
|
||||
*/
|
||||
int READ_INFO::read_value(int delim, String *val)
|
||||
{
|
||||
int chr;
|
||||
String tmp;
|
||||
|
||||
for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;)
|
||||
{
|
||||
#ifdef USE_MB
|
||||
if (my_mbcharlen(read_charset, chr) > 1)
|
||||
{
|
||||
DBUG_PRINT("read_xml",("multi byte"));
|
||||
int i, ml= my_mbcharlen(read_charset, chr);
|
||||
for (i= 1; i < ml; i++)
|
||||
{
|
||||
val->append(chr);
|
||||
/*
|
||||
Don't use my_tospace() in the middle of a multi-byte character
|
||||
TODO: check that the multi-byte sequence is valid.
|
||||
*/
|
||||
chr= GET;
|
||||
if (chr == my_b_EOF)
|
||||
return chr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if(chr == '&')
|
||||
{
|
||||
tmp.length(0);
|
||||
for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
|
||||
{
|
||||
if (chr == my_b_EOF)
|
||||
return chr;
|
||||
tmp.append(chr);
|
||||
}
|
||||
if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
|
||||
val->append(chr);
|
||||
else
|
||||
{
|
||||
val->append('&');
|
||||
val->append(tmp);
|
||||
val->append(';');
|
||||
}
|
||||
}
|
||||
else
|
||||
val->append(chr);
|
||||
chr= GET;
|
||||
}
|
||||
return my_tospace(chr);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Read a record in xml format
|
||||
tags and attributes are stored in taglist
|
||||
when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
|
||||
*/
|
||||
int READ_INFO::read_xml()
|
||||
{
|
||||
DBUG_ENTER("READ_INFO::read_xml");
|
||||
int chr, chr2, chr3;
|
||||
int delim= 0;
|
||||
String tag, attribute, value;
|
||||
bool in_tag= false;
|
||||
|
||||
tag.length(0);
|
||||
attribute.length(0);
|
||||
value.length(0);
|
||||
|
||||
for (chr= my_tospace(GET); chr != my_b_EOF ; )
|
||||
{
|
||||
switch(chr){
|
||||
case '<': /* read tag */
|
||||
/* TODO: check if this is a comment <!-- comment --> */
|
||||
chr= my_tospace(GET);
|
||||
if(chr == '!')
|
||||
{
|
||||
chr2= GET;
|
||||
chr3= GET;
|
||||
|
||||
if(chr2 == '-' && chr3 == '-')
|
||||
{
|
||||
chr2= 0;
|
||||
chr3= 0;
|
||||
chr= my_tospace(GET);
|
||||
|
||||
while(chr != '>' || chr2 != '-' || chr3 != '-')
|
||||
{
|
||||
if(chr == '-')
|
||||
{
|
||||
chr3= chr2;
|
||||
chr2= chr;
|
||||
}
|
||||
else if (chr2 == '-')
|
||||
{
|
||||
chr2= 0;
|
||||
chr3= 0;
|
||||
}
|
||||
chr= my_tospace(GET);
|
||||
if (chr == my_b_EOF)
|
||||
goto found_eof;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tag.length(0);
|
||||
while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
|
||||
{
|
||||
if(chr != delim) /* fix for the '<field name =' format */
|
||||
tag.append(chr);
|
||||
chr= my_tospace(GET);
|
||||
}
|
||||
|
||||
// row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
|
||||
if((tag.length() == line_term_length -2) &&
|
||||
(strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
|
||||
{
|
||||
DBUG_PRINT("read_xml", ("start-of-row: %i %s %s",
|
||||
level,tag.c_ptr_safe(), line_term_ptr));
|
||||
}
|
||||
|
||||
if(chr == ' ' || chr == '>')
|
||||
{
|
||||
level++;
|
||||
clear_level(level + 1);
|
||||
}
|
||||
|
||||
if (chr == ' ')
|
||||
in_tag= true;
|
||||
else
|
||||
in_tag= false;
|
||||
break;
|
||||
|
||||
case ' ': /* read attribute */
|
||||
while(chr == ' ') /* skip blanks */
|
||||
chr= my_tospace(GET);
|
||||
|
||||
if(!in_tag)
|
||||
break;
|
||||
|
||||
while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
|
||||
{
|
||||
attribute.append(chr);
|
||||
chr= my_tospace(GET);
|
||||
}
|
||||
break;
|
||||
|
||||
case '>': /* end tag - read tag value */
|
||||
in_tag= false;
|
||||
chr= read_value('<', &value);
|
||||
if(chr == my_b_EOF)
|
||||
goto found_eof;
|
||||
|
||||
/* save value to list */
|
||||
if(tag.length() > 0 && value.length() > 0)
|
||||
{
|
||||
DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s",
|
||||
level,tag.c_ptr_safe(), value.c_ptr_safe()));
|
||||
taglist.push_front( new XML_TAG(level, tag, value));
|
||||
}
|
||||
tag.length(0);
|
||||
value.length(0);
|
||||
attribute.length(0);
|
||||
break;
|
||||
|
||||
case '/': /* close tag */
|
||||
level--;
|
||||
chr= my_tospace(GET);
|
||||
if(chr != '>') /* if this is an empty tag <tag /> */
|
||||
tag.length(0); /* we should keep tag value */
|
||||
while(chr != '>' && chr != my_b_EOF)
|
||||
{
|
||||
tag.append(chr);
|
||||
chr= my_tospace(GET);
|
||||
}
|
||||
|
||||
if((tag.length() == line_term_length -2) &&
|
||||
(strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
|
||||
{
|
||||
DBUG_PRINT("read_xml", ("found end-of-row %i %s",
|
||||
level, tag.c_ptr_safe()));
|
||||
DBUG_RETURN(0); //normal return
|
||||
}
|
||||
chr= my_tospace(GET);
|
||||
break;
|
||||
|
||||
case '=': /* attribute name end - read the value */
|
||||
//check for tag field and attribute name
|
||||
if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
|
||||
!memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
|
||||
{
|
||||
/*
|
||||
this is format <field name="xx">xx</field>
|
||||
where actual fieldname is in attribute
|
||||
*/
|
||||
delim= my_tospace(GET);
|
||||
tag.length(0);
|
||||
attribute.length(0);
|
||||
chr= '<'; /* we pretend that it is a tag */
|
||||
level--;
|
||||
break;
|
||||
}
|
||||
|
||||
//check for " or '
|
||||
chr= GET;
|
||||
if (chr == my_b_EOF)
|
||||
goto found_eof;
|
||||
if(chr == '"' || chr == '\'')
|
||||
{
|
||||
delim= chr;
|
||||
}
|
||||
else
|
||||
{
|
||||
delim= ' '; /* no delimiter, use space */
|
||||
PUSH(chr);
|
||||
}
|
||||
|
||||
chr= read_value(delim, &value);
|
||||
if(attribute.length() > 0 && value.length() > 0)
|
||||
{
|
||||
DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n",
|
||||
level + 1,
|
||||
attribute.c_ptr_safe(),
|
||||
value.c_ptr_safe()));
|
||||
taglist.push_front(new XML_TAG(level + 1, attribute, value));
|
||||
}
|
||||
attribute.length(0);
|
||||
value.length(0);
|
||||
if (chr != ' ')
|
||||
chr= my_tospace(GET);
|
||||
break;
|
||||
|
||||
default:
|
||||
chr= my_tospace(GET);
|
||||
} /* end switch */
|
||||
} /* end while */
|
||||
|
||||
found_eof:
|
||||
DBUG_PRINT("read_xml",("Found eof"));
|
||||
eof= 1;
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user