1
0
mirror of https://github.com/MariaDB/server.git synced 2026-01-06 05:22:24 +03:00

Merging mysql-next-mr-merge to mysql-next-mr.

This commit is contained in:
Alexander Barkov
2009-10-21 15:48:22 +05:00
84 changed files with 8980 additions and 646 deletions

View File

@@ -15,6 +15,8 @@
/* Copy data from a textfile to table */
/* 2006-12 Erik Wetterberg : LOAD XML added */
#include "mysql_priv.h"
#include <my_dir.h>
#include <m_ctype.h>
@@ -23,6 +25,23 @@
#include "sp_head.h"
#include "sql_trigger.h"
class XML_TAG {
public:
int level;
String field;
String value;
XML_TAG(int l, String f, String v);
};
XML_TAG::XML_TAG(int l, String f, String v)
{
level= l;
field.append(f);
value.append(v);
}
class READ_INFO {
File file;
uchar *buffer, /* Buffer for read text */
@@ -37,6 +56,7 @@ class READ_INFO {
bool need_end_io_cache;
IO_CACHE cache;
NET *io_net;
int level; /* for load xml */
public:
bool error,line_cuted,found_null,enclosed;
@@ -54,6 +74,12 @@ public:
char unescape(char chr);
int terminator(char *ptr,uint length);
bool find_start_of_fields();
/* load xml */
List<XML_TAG> taglist;
int read_value(int delim, String *val);
int read_xml();
int clear_level(int level);
/*
We need to force cache close before destructor is invoked to log
the last read block
@@ -82,6 +108,13 @@ static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
List<Item> &set_values, READ_INFO &read_info,
String &enclosed, ulong skip_lines,
bool ignore_check_option_errors);
static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
List<Item> &fields_vars, List<Item> &set_fields,
List<Item> &set_values, READ_INFO &read_info,
String &enclosed, ulong skip_lines,
bool ignore_check_option_errors);
#ifndef EMBEDDED_LIBRARY
static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex,
const char* db_arg,
@@ -393,7 +426,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */
thd->cuted_fields=0L;
/* Skip lines if there is a line terminator */
if (ex->line_term->length())
if (ex->line_term->length() && ex->filetype != FILETYPE_XML)
{
/* ex->skip_lines needs to be preserved for logging */
while (skip_lines > 0)
@@ -424,7 +457,11 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
(MODE_STRICT_TRANS_TABLES |
MODE_STRICT_ALL_TABLES)));
if (!field_term->length() && !enclosed->length())
if (ex->filetype == FILETYPE_XML) /* load xml */
error= read_xml_field(thd, info, table_list, fields_vars,
set_fields, set_values, read_info,
*(ex->line_term), skip_lines, ignore);
else if (!field_term->length() && !enclosed->length())
error= read_fixed_length(thd, info, table_list, fields_vars,
set_fields, set_values, read_info,
skip_lines, ignore);
@@ -1005,6 +1042,171 @@ continue_loop:;
}
/****************************************************************************
** Read rows in xml format
****************************************************************************/
static int
read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
List<Item> &fields_vars, List<Item> &set_fields,
List<Item> &set_values, READ_INFO &read_info,
String &row_tag, ulong skip_lines,
bool ignore_check_option_errors)
{
List_iterator_fast<Item> it(fields_vars);
Item *item;
TABLE *table= table_list->table;
bool no_trans_update_stmt;
CHARSET_INFO *cs= read_info.read_charset;
DBUG_ENTER("read_xml_field");
no_trans_update_stmt= !table->file->has_transactions();
for ( ; ; it.rewind())
{
if (thd->killed)
{
thd->send_kill_message();
DBUG_RETURN(1);
}
// read row tag and save values into tag list
if (read_info.read_xml())
break;
List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
xmlit.rewind();
XML_TAG *tag= NULL;
#ifndef DBUG_OFF
DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines));
while ((tag= xmlit++))
{
DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'",
tag->level, tag->field.c_ptr(),
tag->value.c_ptr()));
}
#endif
restore_record(table, s->default_values);
while ((item= it++))
{
/* If this line is to be skipped we don't want to fill field or var */
if (skip_lines)
continue;
/* find field in tag list */
xmlit.rewind();
tag= xmlit++;
while(tag && strcmp(tag->field.c_ptr(), item->name) != 0)
tag= xmlit++;
if (!tag) // found null
{
if (item->type() == Item::FIELD_ITEM)
{
Field *field= ((Item_field *) item)->field;
field->reset();
field->set_null();
if (field == table->next_number_field)
table->auto_increment_field_not_null= TRUE;
if (!field->maybe_null())
{
if (field->type() == FIELD_TYPE_TIMESTAMP)
((Field_timestamp *) field)->set_time();
else if (field != table->next_number_field)
field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
ER_WARN_NULL_TO_NOTNULL, 1);
}
}
else
((Item_user_var_as_out_param *) item)->set_null_value(cs);
continue;
}
if (item->type() == Item::FIELD_ITEM)
{
Field *field= ((Item_field *)item)->field;
field->set_notnull();
if (field == table->next_number_field)
table->auto_increment_field_not_null= TRUE;
field->store((char *) tag->value.ptr(), tag->value.length(), cs);
}
else
((Item_user_var_as_out_param *) item)->set_value(
(char *) tag->value.ptr(),
tag->value.length(), cs);
}
if (read_info.error)
break;
if (skip_lines)
{
skip_lines--;
continue;
}
if (item)
{
/* Have not read any field, thus input file is simply ended */
if (item == fields_vars.head())
break;
for ( ; item; item= it++)
{
if (item->type() == Item::FIELD_ITEM)
{
/*
QQ: We probably should not throw warning for each field.
But how about intention to always have the same number
of warnings in THD::cuted_fields (and get rid of cuted_fields
in the end ?)
*/
thd->cuted_fields++;
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_WARN_TOO_FEW_RECORDS,
ER(ER_WARN_TOO_FEW_RECORDS),
thd->warning_info->current_row_for_warning());
}
else
((Item_user_var_as_out_param *)item)->set_null_value(cs);
}
}
if (thd->killed ||
fill_record_n_invoke_before_triggers(thd, set_fields, set_values,
ignore_check_option_errors,
table->triggers,
TRG_EVENT_INSERT))
DBUG_RETURN(1);
switch (table_list->view_check_option(thd,
ignore_check_option_errors)) {
case VIEW_CHECK_SKIP:
read_info.next_line();
goto continue_loop;
case VIEW_CHECK_ERROR:
DBUG_RETURN(-1);
}
if (write_record(thd, table, &info))
DBUG_RETURN(1);
/*
We don't need to reset auto-increment field since we are restoring
its default value at the beginning of each loop iteration.
*/
thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
thd->warning_info->inc_current_row_for_warning();
continue_loop:;
}
DBUG_RETURN(test(read_info.error) || thd->is_error());
} /* load xml end */
/* Unescape all escape characters, mark \N as null */
char
@@ -1043,6 +1245,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
field_term_length= field_term.length();
line_term_ptr=(char*) line_term.ptr();
line_term_length= line_term.length();
level= 0; /* for load xml */
if (line_start.length() == 0)
{
line_start_ptr=0;
@@ -1118,6 +1321,10 @@ READ_INFO::~READ_INFO()
my_free((uchar*) buffer,MYF(0));
error=1;
}
List_iterator<XML_TAG> xmlit(taglist);
XML_TAG *t;
while ((t= xmlit++))
delete(t);
}
@@ -1450,3 +1657,319 @@ bool READ_INFO::find_start_of_fields()
}
return 0;
}
/*
Clear taglist from tags with a specified level
*/
int READ_INFO::clear_level(int level)
{
DBUG_ENTER("READ_INFO::read_xml clear_level");
List_iterator<XML_TAG> xmlit(taglist);
xmlit.rewind();
XML_TAG *tag;
while ((tag= xmlit++))
{
if(tag->level >= level)
{
xmlit.remove();
delete tag;
}
}
DBUG_RETURN(0);
}
/*
Convert an XML entity to Unicode value.
Return -1 on error;
*/
static int
my_xml_entity_to_char(const char *name, uint length)
{
if (length == 2)
{
if (!memcmp(name, "gt", length))
return '>';
if (!memcmp(name, "lt", length))
return '<';
}
else if (length == 3)
{
if (!memcmp(name, "amp", length))
return '&';
}
else if (length == 4)
{
if (!memcmp(name, "quot", length))
return '"';
if (!memcmp(name, "apos", length))
return '\'';
}
return -1;
}
/**
@brief Convert newline, linefeed, tab to space
@param chr character
@details According to the "XML 1.0" standard,
only space (#x20) characters, carriage returns,
line feeds or tabs are considered as spaces.
Convert all of them to space (#x20) for parsing simplicity.
*/
static int
my_tospace(int chr)
{
return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
}
/*
Read an xml value: handle multibyte and xml escape
*/
int READ_INFO::read_value(int delim, String *val)
{
int chr;
String tmp;
for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;)
{
#ifdef USE_MB
if (my_mbcharlen(read_charset, chr) > 1)
{
DBUG_PRINT("read_xml",("multi byte"));
int i, ml= my_mbcharlen(read_charset, chr);
for (i= 1; i < ml; i++)
{
val->append(chr);
/*
Don't use my_tospace() in the middle of a multi-byte character
TODO: check that the multi-byte sequence is valid.
*/
chr= GET;
if (chr == my_b_EOF)
return chr;
}
}
#endif
if(chr == '&')
{
tmp.length(0);
for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
{
if (chr == my_b_EOF)
return chr;
tmp.append(chr);
}
if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
val->append(chr);
else
{
val->append('&');
val->append(tmp);
val->append(';');
}
}
else
val->append(chr);
chr= GET;
}
return my_tospace(chr);
}
/*
Read a record in xml format
tags and attributes are stored in taglist
when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
*/
int READ_INFO::read_xml()
{
DBUG_ENTER("READ_INFO::read_xml");
int chr, chr2, chr3;
int delim= 0;
String tag, attribute, value;
bool in_tag= false;
tag.length(0);
attribute.length(0);
value.length(0);
for (chr= my_tospace(GET); chr != my_b_EOF ; )
{
switch(chr){
case '<': /* read tag */
/* TODO: check if this is a comment <!-- comment --> */
chr= my_tospace(GET);
if(chr == '!')
{
chr2= GET;
chr3= GET;
if(chr2 == '-' && chr3 == '-')
{
chr2= 0;
chr3= 0;
chr= my_tospace(GET);
while(chr != '>' || chr2 != '-' || chr3 != '-')
{
if(chr == '-')
{
chr3= chr2;
chr2= chr;
}
else if (chr2 == '-')
{
chr2= 0;
chr3= 0;
}
chr= my_tospace(GET);
if (chr == my_b_EOF)
goto found_eof;
}
break;
}
}
tag.length(0);
while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
{
if(chr != delim) /* fix for the '<field name =' format */
tag.append(chr);
chr= my_tospace(GET);
}
// row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
if((tag.length() == line_term_length -2) &&
(strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
{
DBUG_PRINT("read_xml", ("start-of-row: %i %s %s",
level,tag.c_ptr_safe(), line_term_ptr));
}
if(chr == ' ' || chr == '>')
{
level++;
clear_level(level + 1);
}
if (chr == ' ')
in_tag= true;
else
in_tag= false;
break;
case ' ': /* read attribute */
while(chr == ' ') /* skip blanks */
chr= my_tospace(GET);
if(!in_tag)
break;
while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
{
attribute.append(chr);
chr= my_tospace(GET);
}
break;
case '>': /* end tag - read tag value */
in_tag= false;
chr= read_value('<', &value);
if(chr == my_b_EOF)
goto found_eof;
/* save value to list */
if(tag.length() > 0 && value.length() > 0)
{
DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s",
level,tag.c_ptr_safe(), value.c_ptr_safe()));
taglist.push_front( new XML_TAG(level, tag, value));
}
tag.length(0);
value.length(0);
attribute.length(0);
break;
case '/': /* close tag */
level--;
chr= my_tospace(GET);
if(chr != '>') /* if this is an empty tag <tag /> */
tag.length(0); /* we should keep tag value */
while(chr != '>' && chr != my_b_EOF)
{
tag.append(chr);
chr= my_tospace(GET);
}
if((tag.length() == line_term_length -2) &&
(strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
{
DBUG_PRINT("read_xml", ("found end-of-row %i %s",
level, tag.c_ptr_safe()));
DBUG_RETURN(0); //normal return
}
chr= my_tospace(GET);
break;
case '=': /* attribute name end - read the value */
//check for tag field and attribute name
if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
!memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
{
/*
this is format <field name="xx">xx</field>
where actual fieldname is in attribute
*/
delim= my_tospace(GET);
tag.length(0);
attribute.length(0);
chr= '<'; /* we pretend that it is a tag */
level--;
break;
}
//check for " or '
chr= GET;
if (chr == my_b_EOF)
goto found_eof;
if(chr == '"' || chr == '\'')
{
delim= chr;
}
else
{
delim= ' '; /* no delimiter, use space */
PUSH(chr);
}
chr= read_value(delim, &value);
if(attribute.length() > 0 && value.length() > 0)
{
DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n",
level + 1,
attribute.c_ptr_safe(),
value.c_ptr_safe()));
taglist.push_front(new XML_TAG(level + 1, attribute, value));
}
attribute.length(0);
value.length(0);
if (chr != ' ')
chr= my_tospace(GET);
break;
default:
chr= my_tospace(GET);
} /* end switch */
} /* end while */
found_eof:
DBUG_PRINT("read_xml",("Found eof"));
eof= 1;
DBUG_RETURN(1);
}