From 1bcc8a3052d9dd53a14b457121216d86e3db41c1 Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Sat, 28 Apr 2018 14:48:13 -0700 Subject: [PATCH 1/6] MCOL-1229 - IS.columnstore_columns crashes when DDL is simultaneously executing. The crash was due to an attempt to iterate over the columns of a recently dropped table. Such a table will now be ignored. --- dbcon/mysql/is_columnstore_columns.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp index 21c9e748e..13f9b9485 100644 --- a/dbcon/mysql/is_columnstore_columns.cpp +++ b/dbcon/mysql/is_columnstore_columns.cpp @@ -27,6 +27,8 @@ #include #include "calpontsystemcatalog.h" #include "dataconvert.h" +#include "exceptclasses.h" +using namespace logging; // Required declaration as it isn't in a MairaDB include @@ -70,7 +72,22 @@ static int is_columnstore_columns_fill(THD *thd, TABLE_LIST *tables, COND *cond) for (std::vector >::const_iterator it = catalog_tables.begin(); it != catalog_tables.end(); ++it) { - execplan::CalpontSystemCatalog::RIDList column_rid_list = systemCatalogPtr->columnRIDs((*it).second, true); + execplan::CalpontSystemCatalog::RIDList column_rid_list; + // Note a table may get dropped as you iterate over the list of tables. + // So simply ignore the dropped table. + try { + column_rid_list = systemCatalogPtr->columnRIDs((*it).second, true); + } + catch (IDBExcept& ex) + { + if (ex.errorCode() == ERR_TABLE_NOT_IN_CATALOG) { + continue; + } + else { + throw; + } + } + for (size_t col_num = 0; col_num < column_rid_list.size(); col_num++) { execplan::CalpontSystemCatalog::TableColName tcn = systemCatalogPtr->colName(column_rid_list[col_num].objnum); From fda6b35243649673a7a3d9f9d082b796280e9016 Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Mon, 30 Apr 2018 10:39:13 -0700 Subject: [PATCH 2/6] More change for MCOL-1229. Do not throw an exception but return an error code. --- dbcon/mysql/is_columnstore_columns.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbcon/mysql/is_columnstore_columns.cpp b/dbcon/mysql/is_columnstore_columns.cpp index 13f9b9485..b446eed4e 100644 --- a/dbcon/mysql/is_columnstore_columns.cpp +++ b/dbcon/mysql/is_columnstore_columns.cpp @@ -84,7 +84,7 @@ static int is_columnstore_columns_fill(THD *thd, TABLE_LIST *tables, COND *cond) continue; } else { - throw; + return 1; } } From 0b32f95dac0a69dd740ac5c68e02782bd6d9c5f9 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Wed, 2 May 2018 15:26:01 +0100 Subject: [PATCH 3/6] MCOL-1349 Fix outer joins in views Outer join handling inside views was broken due to the joins being processed twice. This patch brings back the code so that outer joins in views are only processed once. --- dbcon/mysql/ha_calpont_execplan.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 0f54d8bdb..9cc3a99c8 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -801,8 +801,8 @@ uint32_t buildOuterJoin(gp_walk_info& gwi, SELECT_LEX& select_lex) // View is already processed in view::transform // @bug5319. view is sometimes treated as derived table and // fromSub::transform does not build outer join filters. - //if (!table_ptr->derived && table_ptr->view) - // continue; + if (!table_ptr->derived && table_ptr->view) + continue; CalpontSystemCatalog:: TableAliasName tan = make_aliasview( (table_ptr->db ? table_ptr->db : ""), From 543f6cb8dd9987bf064737e2f8dd4401fb033834 Mon Sep 17 00:00:00 2001 From: david hill Date: Thu, 3 May 2018 09:06:45 -0500 Subject: [PATCH 4/6] MCOL-1377 - enchance the user/group setting for syslog --- oam/install_scripts/syslogSetup.sh | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/oam/install_scripts/syslogSetup.sh b/oam/install_scripts/syslogSetup.sh index 1f4235a30..78b292dac 100755 --- a/oam/install_scripts/syslogSetup.sh +++ b/oam/install_scripts/syslogSetup.sh @@ -13,6 +13,7 @@ syslog_conf=nofile rsyslog7=0 user=`whoami 2>/dev/null` +group=user SUDO=" " if [ "$user" != "root" ]; then @@ -167,9 +168,24 @@ if [ ! -z "$syslog_conf" ] ; then # remove older version incase it was installed by previous build $SUDO rm -rf /etc/rsyslog.d/columnstore.conf + // determine username/groupname + + if [ -f /var/log/messages ]; then + user=`stat -c "%U %G" /var/log/messages | awk '{print $1}'` + group=`stat -c "%U %G" /var/log/messages | awk '{print $2}'` + fi + + if [ -f /var/log/syslog ]; then + user=`stat -c "%U %G" /var/log/syslog | awk '{print $1}'` + group=`stat -c "%U %G" /var/log/syslog | awk '{print $2}'` + fi + + //set permissions + $SUDO chown $user:$group -R /var/log/mariadb > /dev/null 2>&1 + if [ $rsyslog7 == 1 ]; then - sed -i -e s/groupname/adm/g ${columnstoreSyslogFile7} - sed -i -e s/username/syslog/g ${columnstoreSyslogFile7} + sed -i -e s/groupname/$group/g ${columnstoreSyslogFile7} + sed -i -e s/username/$user/g ${columnstoreSyslogFile7} $SUDO rm -f /etc/rsyslog.d/49-columnstore.conf $SUDO cp ${columnstoreSyslogFile7} ${syslog_conf} From b9f2b554847f81e29e2ba6f0b9616d2baf96613f Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Wed, 24 Jan 2018 09:40:44 +0300 Subject: [PATCH 5/6] MCOL-1384 Parser accepts quotes with qualified dbobject identifiers. --- dbcon/ddlpackage/CMakeLists.txt | 4 ++ dbcon/ddlpackage/ddl.l | 100 +++++++++++++++++++++++++++++--- dbcon/ddlpackage/ddl.y | 17 +++++- dbcon/mysql/ha_calpont_ddl.cpp | 74 +++++++++++------------ 4 files changed, 149 insertions(+), 46 deletions(-) diff --git a/dbcon/ddlpackage/CMakeLists.txt b/dbcon/ddlpackage/CMakeLists.txt index 27d2a3015..ae2f82fa9 100644 --- a/dbcon/ddlpackage/CMakeLists.txt +++ b/dbcon/ddlpackage/CMakeLists.txt @@ -9,6 +9,10 @@ ADD_CUSTOM_COMMAND( DEPENDS ddl.y ddl.l ) +# Parser puts extra info to stderr. +INCLUDE(../../check_compiler_flag.cmake) +MY_CHECK_AND_SET_COMPILER_FLAG("-DYYDEBUG" DEBUG) + ########### next target ############### set(ddlpackage_LIB_SRCS diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index ac51fe020..f65ef161d 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -18,6 +18,7 @@ /* $Id: ddl.l 9341 2013-03-27 14:10:35Z chao $ */ %{ +#include #include #include #include @@ -31,10 +32,11 @@ #endif using namespace ddlpackage; +typedef enum { NOOP, STRIP_QUOTES, STRIP_QUOTES_FQ } copy_action_t; int lineno = 1; void ddlerror(struct pass_to_bison* x, char const *s); -static char* scanner_copy(char *str, yyscan_t yyscanner); +static char* scanner_copy(char *str, yyscan_t yyscanner, copy_action_t action = NOOP ); %} @@ -54,6 +56,10 @@ horiz_space [ \t\f] newline [\n\r] non_newline [^\n\r] +quote ' +double_quote \" +grave_accent ` + comment ("--"{non_newline}*) self [,()\[\].;\:\+\-\*\/\%\^\<\>\=] whitespace ({space}+|{comment}) @@ -62,6 +68,12 @@ digit [0-9] ident_start [A-Za-z\200-\377_] ident_cont [A-Za-z\200-\377_0-9\$] identifier {ident_start}{ident_cont}* +/* fully qualified names regexes */ +fq_identifier {identifier}\.{identifier} +identifier_quoted {grave_accent}{identifier}{grave_accent} +identifier_double_quoted {double_quote}{identifier}{double_quote} +fq_quoted ({identifier_quoted}|{identifier})\.({identifier_quoted}|{identifier}) +fq_double_quoted ({identifier_double_quoted}|{identifier})\.({identifier_double_quoted}|{identifier}) integer [-+]?{digit}+ decimal ([-+]?({digit}*\.{digit}+)|({digit}+\.{digit}*)) @@ -69,11 +81,16 @@ real ({integer}|{decimal})[Ee][-+]?{digit}+ realfail1 ({integer}|{decimal})[Ee] realfail2 ({integer}|{decimal})[Ee][-+] -quote ' -grave_accent ` %% + +{identifier_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return IDENT; } +{identifier_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy( ddlget_text(yyscanner), yyscanner, STRIP_QUOTES ); return IDENT; } +{fq_identifier} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner); return FQ_IDENT; } +{fq_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES_FQ); return FQ_IDENT; } +{fq_double_quoted} { ddlget_lval(yyscanner)->str = scanner_copy(ddlget_text(yyscanner), yyscanner, STRIP_QUOTES_FQ); return FQ_IDENT; } + ACTION {return ACTION;} ADD {return ADD;} ALTER {return ALTER;} @@ -198,6 +215,11 @@ using namespace ddlpackage; */ void scanner_init(const char* str, yyscan_t yyscanner) { +#ifdef YYDEBUG + extern int ddldebug; + ddldebug = 1; +#endif + size_t slen = strlen(str); scan_data* pScanData = (scan_data*)ddlget_extra(yyscanner); @@ -246,10 +268,72 @@ void scanner_finish(yyscan_t yyscanner) pScanData->valbuf.clear(); } -char* scanner_copy (char *str, yyscan_t yyscanner) +char* scanner_copy (char *str, yyscan_t yyscanner, copy_action_t action) { - char* nv = strdup(str); - if(nv) - ((scan_data*)ddlget_extra(yyscanner))->valbuf.push_back(nv); - return nv; + char* result; + char* nv = strdup(str); + result = nv; + + // free strduped memory later to prevent possible memory leak + if(nv) + ((scan_data*)ddlget_extra(yyscanner))->valbuf.push_back(nv); + + if(action == STRIP_QUOTES) + { + nv[strlen(str) - 1] = '\0'; + result = nv + 1; + } + else if (action == STRIP_QUOTES_FQ) + { + bool move_left = false; + bool move_right = false; + char* left = nv; + char* tmp_first = nv; + // MCOL-1384 Loop through all comas in this quoted fq id + // looking for $quote_sign.$quote_sign sequence. + char* fq_delimiter; + int tmp_pos = 0; + while((fq_delimiter = strchr(tmp_first, '.')) != NULL) + { + if( (*(fq_delimiter -1) == '`' && *(fq_delimiter + 1) == '`') || + (*(fq_delimiter -1) == '"' && *(fq_delimiter + 1) == '"') ) + { + tmp_pos += fq_delimiter - tmp_first; + break; + } + tmp_first = fq_delimiter; + } + + char* fq_delimiter_orig = str + tmp_pos; + char* right = fq_delimiter + 1; + char* right_orig = fq_delimiter_orig + 1; + // MCOL-1384 Strip quotes from the left part. + if(*left == '"' || *left == '`') + { + result = left + 1; + *(fq_delimiter - 1) = '.'; + move_left = true; + } + else + { + fq_delimiter += 1; + } + + int right_length = strlen(right); + // MCOL-1384 Strip quotes from the right part. + if(*right == '`' || *right == '"') + { + right += 1; right_orig += 1; + right_length -= 2; + move_right = true; + *(fq_delimiter + right_length) = '\0'; + } + + if(move_left || move_right) + { + strncpy(fq_delimiter, right_orig, right_length); + } + } + + return result; } diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 8d36b2c2b..982167287 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -48,6 +48,7 @@ */ %{ +#include "string.h" #include "sqlparser.h" #ifdef _MSC_VER @@ -121,7 +122,7 @@ REFERENCES RENAME RESTRICT SET SMALLINT TABLE TEXT TIME TINYBLOB TINYTEXT TINYINT TO UNIQUE UNSIGNED UPDATE USER SESSION_USER SYSTEM_USER VARCHAR VARBINARY VARYING WITH ZONE DOUBLE IDB_FLOAT REAL CHARSET IDB_IF EXISTS CHANGE TRUNCATE -%token IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE +%token FQ_IDENT IDENT FCONST SCONST CP_SEARCH_CONDITION_TEXT ICONST DATE /* Notes: * 1. "ata" stands for alter_table_action @@ -611,7 +612,19 @@ table_name: ; qualified_name: - IDENT '.' IDENT {$$ = new QualifiedName($1, $3);} + FQ_IDENT { + char* delimeterPosition = strchr(const_cast($1), '.'); + if( delimeterPosition ) + { + *delimeterPosition = '\0'; + char* schemaName = const_cast($1); + char* tableName = delimeterPosition + 1; + $$ = new QualifiedName(schemaName, tableName); + *delimeterPosition = '.'; + } + else + $$ = new QualifiedName($1); + } | IDENT { if (x->fDBSchema.size()) $$ = new QualifiedName((char*)x->fDBSchema.c_str(), $1); diff --git a/dbcon/mysql/ha_calpont_ddl.cpp b/dbcon/mysql/ha_calpont_ddl.cpp index 611f1da3b..74b413667 100755 --- a/dbcon/mysql/ha_calpont_ddl.cpp +++ b/dbcon/mysql/ha_calpont_ddl.cpp @@ -2039,51 +2039,53 @@ int ha_calpont_impl_delete_table_(const char *db, const char *name, cal_connecti int ha_calpont_impl_rename_table_(const char* from, const char* to, cal_connection_info& ci) { - THD *thd = current_thd; - string emsg; + THD* thd = current_thd; + string emsg; - ostringstream stmt1; - pair fromPair; - pair toPair; - string stmt; + pair fromPair; + pair toPair; + string stmt; - //this is replcated DDL, treat it just like SSO - if (thd->slave_thread) - return 0; + //this is replcated DDL, treat it just like SSO + if (thd->slave_thread) + return 0; - //@bug 5660. Error out REAL DDL/DML on slave node. - // When the statement gets here, it's NOT SSO or RESTRICT - if (ci.isSlaveNode) - { - string emsg = logging::IDBErrorInfo::instance()->errorMsg(ERR_DML_DDL_SLAVE); - setError(current_thd, ER_CHECK_NOT_IMPLEMENTED, emsg); - return 1; - } + //@bug 5660. Error out REAL DDL/DML on slave node. + // When the statement gets here, it's NOT SSO or RESTRICT + if (ci.isSlaveNode) + { + string emsg = logging::IDBErrorInfo::instance()->errorMsg(ERR_DML_DDL_SLAVE); + setError(current_thd, ER_CHECK_NOT_IMPLEMENTED, emsg); + return 1; + } - fromPair = parseTableName(from); - toPair = parseTableName(to); + fromPair = parseTableName(from); + toPair = parseTableName(to); - if (fromPair.first != toPair.first) - { - thd->get_stmt_da()->set_overwrite_status(true); - thd->raise_error_printf(ER_CHECK_NOT_IMPLEMENTED, "Both tables must be in the same database to use RENAME TABLE"); - return -1; - } + if (fromPair.first != toPair.first) + { + thd->get_stmt_da()->set_overwrite_status(true); + thd->raise_error_printf(ER_CHECK_NOT_IMPLEMENTED, "Both tables must be in the same database to use RENAME TABLE"); + return -1; + } - stmt1 << "alter table " << fromPair.second << " rename to " << toPair.second << ";"; + // This explicitely shields both db objects with quotes that the lexer strips down later. + stmt = "alter table `" + fromPair.second + "` rename to `" + toPair.second + "`;"; + string db; - stmt = stmt1.str(); - string db; - if ( fromPair.first.length() !=0 ) - db = fromPair.first; - else if ( thd->db ) - db = thd->db; + if ( thd->db ) + db = thd->db; + else if ( fromPair.first.length() != 0 ) + db = fromPair.first; + else + db = toPair.first; - int rc = ProcessDDLStatement(stmt, db, "", tid2sid(thd->thread_id), emsg); - if (rc != 0) - push_warning(thd, Sql_condition::WARN_LEVEL_WARN, 9999, emsg.c_str()); + int rc = ProcessDDLStatement(stmt, db, "", tid2sid(thd->thread_id), emsg); - return rc; + if (rc != 0) + push_warning(thd, Sql_condition::WARN_LEVEL_ERROR, 9999, emsg.c_str()); + + return rc; } From ac3e702a3e038765772f662da029e4e7c61333c5 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 8 May 2018 19:38:06 +0100 Subject: [PATCH 6/6] MCOL-1396 Allow StringStore to hold more than 2GB StringStore originally worked by returning a 32bit pointer to a memory location and storing the length with that pointer. This allowed 4GB to be stored in 64KB blocks. With 1.1 we used the high bit to signify a TEXT/BLOB string of > 64KB reducing the max capacity to 2GB but without any bounds checking. So, if you went over the 2GB mark the getter would think you are trying to get a long string instead of a short one and come up empty. It would then return NULL. This patch uses 64bit memory points still retaining the high bit to signify long strings. It also now stores the length with the string rather than with the pointer to allow the full 64bits for pointers. It also adds a bounds check for small strings. --- utils/rowgroup/rowgroup.cpp | 64 ++++++++-------- utils/rowgroup/rowgroup.h | 147 ++++++++++++++++++++++-------------- 2 files changed, 123 insertions(+), 88 deletions(-) diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index 48bdd7031..ba64e3596 100755 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -79,10 +79,10 @@ StringStore::~StringStore() #endif } -uint32_t StringStore::storeString(const uint8_t *data, uint32_t len) +uint64_t StringStore::storeString(const uint8_t *data, uint32_t len) { MemChunk *lastMC = NULL; - uint32_t ret = 0; + uint64_t ret = 0; empty = false; // At least a NULL is being stored. @@ -92,7 +92,7 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len) if ((len == 8 || len == 9) && *((uint64_t *) data) == *((uint64_t *) joblist::CPNULLSTRMARK.c_str())) - return numeric_limits::max(); + return numeric_limits::max(); //@bug6065, make StringStore::storeString() thread safe boost::mutex::scoped_lock lk(fMutex, defer_lock); @@ -102,20 +102,21 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len) if (mem.size() > 0) lastMC = (MemChunk *) mem.back().get(); - if (len >= CHUNK_SIZE) + if ((len+4) >= CHUNK_SIZE) { - shared_array newOne(new uint8_t[len + sizeof(MemChunk)]); + shared_array newOne(new uint8_t[len + sizeof(MemChunk) + 4]); longStrings.push_back(newOne); lastMC = (MemChunk*) longStrings.back().get(); - lastMC->capacity = lastMC->currentSize = len; - memcpy(lastMC->data, data, len); + lastMC->capacity = lastMC->currentSize = len + 4; + memcpy(lastMC->data, &len, 4); + memcpy(lastMC->data + 4, data, len); // High bit to mark a long string - ret = 0x80000000; + ret = 0x8000000000000000; ret += longStrings.size() - 1; } else { - if ((lastMC == NULL) || (lastMC->capacity - lastMC->currentSize < len)) + if ((lastMC == NULL) || (lastMC->capacity - lastMC->currentSize < (len + 4))) { // mem usage debugging //if (lastMC) @@ -130,7 +131,11 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len) ret = ((mem.size()-1) * CHUNK_SIZE) + lastMC->currentSize; - memcpy(&(lastMC->data[lastMC->currentSize]), data, len); + // If this ever happens then we have big problems + if (ret & 0x8000000000000000) + throw logic_error("StringStore memory exceeded."); + memcpy(&(lastMC->data[lastMC->currentSize]), &len, 4); + memcpy(&(lastMC->data[lastMC->currentSize]) + 4, data, len); /* cout << "stored: '" << hex; for (uint32_t i = 0; i < len ; i++) { @@ -138,7 +143,7 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len) } cout << "' at position " << lastMC->currentSize << " len " << len << dec << endl; */ - lastMC->currentSize += len; + lastMC->currentSize += len + 4; } return ret; @@ -146,31 +151,31 @@ uint32_t StringStore::storeString(const uint8_t *data, uint32_t len) void StringStore::serialize(ByteStream &bs) const { - uint32_t i; + uint64_t i; MemChunk *mc; - bs << (uint32_t) mem.size(); + bs << (uint64_t) mem.size(); bs << (uint8_t) empty; for (i = 0; i < mem.size(); i++) { mc = (MemChunk *) mem[i].get(); - bs << (uint32_t) mc->currentSize; + bs << (uint64_t) mc->currentSize; //cout << "serialized " << mc->currentSize << " bytes\n"; bs.append(mc->data, mc->currentSize); } - bs << (uint32_t) longStrings.size(); + bs << (uint64_t) longStrings.size(); for (i = 0; i < longStrings.size(); i++) { mc = (MemChunk *) longStrings[i].get(); - bs << (uint32_t) mc->currentSize; + bs << (uint64_t) mc->currentSize; bs.append(mc->data, mc->currentSize); } } void StringStore::deserialize(ByteStream &bs) { - uint32_t i; - uint32_t count; - uint32_t size; + uint64_t i; + uint64_t count; + uint64_t size; uint8_t *buf; MemChunk *mc; uint8_t tmp8; @@ -718,10 +723,9 @@ bool Row::isNullValue(uint32_t colIndex) const case CalpontSystemCatalog::STRINT: { uint32_t len = getColumnWidth(colIndex); if (inStringTable(colIndex)) { - uint32_t offset, length; - offset = *((uint32_t *) &data[offsets[colIndex]]); - length = *((uint32_t *) &data[offsets[colIndex] + 4]); - return strings->isNullValue(offset, length); + uint64_t offset; + offset = *((uint64_t *) &data[offsets[colIndex]]); + return strings->isNullValue(offset); } if (data[offsets[colIndex]] == 0) // empty string return true; @@ -757,10 +761,9 @@ bool Row::isNullValue(uint32_t colIndex) const case CalpontSystemCatalog::VARBINARY: { uint32_t pos = offsets[colIndex]; if (inStringTable(colIndex)) { - uint32_t offset, length; - offset = *((uint32_t *) &data[pos]); - length = *((uint32_t *) &data[pos+4]); - return strings->isNullValue(offset, length); + uint64_t offset; + offset = *((uint64_t *) &data[pos]); + return strings->isNullValue(offset); } if (*((uint16_t*) &data[pos]) == 0) return true; @@ -1416,8 +1419,8 @@ RGData RowGroup::duplicate() void Row::setStringField(const std::string &val, uint32_t colIndex) { - uint32_t length; - uint32_t offset; + uint64_t offset; + uint64_t length; //length = strlen(val.c_str()) + 1; length = val.length(); @@ -1426,8 +1429,7 @@ void Row::setStringField(const std::string &val, uint32_t colIndex) if (inStringTable(colIndex)) { offset = strings->storeString((const uint8_t *) val.data(), length); - *((uint32_t *) &data[offsets[colIndex]]) = offset; - *((uint32_t *) &data[offsets[colIndex] + 4]) = length; + *((uint64_t *) &data[offsets[colIndex]]) = offset; // cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]]) // << " length " << *((uint32_t *) &data[offsets[colIndex] + 4]) // << endl; diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 8b5ea75d7..7aca0c93f 100755 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -92,13 +92,14 @@ public: StringStore(); virtual ~StringStore(); - inline std::string getString(uint32_t offset, uint32_t length) const; - uint32_t storeString(const uint8_t *data, uint32_t length); //returns the offset - inline const uint8_t * getPointer(uint32_t offset) const; + inline std::string getString(uint64_t offset) const; + uint64_t storeString(const uint8_t *data, uint32_t length); //returns the offset + inline const uint8_t * getPointer(uint64_t offset) const; + inline uint32_t getStringLength(uint64_t offset); inline bool isEmpty() const; inline uint64_t getSize() const; - inline bool isNullValue(uint32_t offset, uint32_t length) const; - inline bool equals(const std::string &str, uint32_t offset, uint32_t length) const; + inline bool isNullValue(uint64_t offset) const; + inline bool equals(const std::string &str, uint64_t offset) const; void clear(); @@ -541,9 +542,8 @@ inline bool Row::equals(uint64_t val, uint32_t colIndex) const inline bool Row::equals(const std::string &val, uint32_t colIndex) const { if (inStringTable(colIndex)) { - uint32_t offset = *((uint32_t *) &data[offsets[colIndex]]); - uint32_t length = *((uint32_t *) &data[offsets[colIndex] + 4]); - return strings->equals(val, offset, length); + uint64_t offset = *((uint64_t *) &data[offsets[colIndex]]); + return strings->equals(val, offset); } else return (strncmp(val.c_str(), (char *) &data[offsets[colIndex]], getColumnWidth(colIndex)) == 0); @@ -609,28 +609,27 @@ inline int64_t Row::getIntField(uint32_t colIndex) const inline const uint8_t * Row::getStringPointer(uint32_t colIndex) const { if (inStringTable(colIndex)) - return strings->getPointer(*((uint32_t *) &data[offsets[colIndex]])); + return strings->getPointer(*((uint64_t *) &data[offsets[colIndex]])); return &data[offsets[colIndex]]; } inline uint32_t Row::getStringLength(uint32_t colIndex) const { if (inStringTable(colIndex)) - return *((uint32_t *) &data[offsets[colIndex] + 4]); + return strings->getStringLength(*((uint64_t *) &data[offsets[colIndex]])); return strnlen((char *) &data[offsets[colIndex]], getColumnWidth(colIndex)); } inline void Row::setStringField(const uint8_t *strdata, uint32_t length, uint32_t colIndex) { - uint32_t offset; + uint64_t offset; if (length > getColumnWidth(colIndex)) length = getColumnWidth(colIndex); if (inStringTable(colIndex)) { offset = strings->storeString(strdata, length); - *((uint32_t *) &data[offsets[colIndex]]) = offset; - *((uint32_t *) &data[offsets[colIndex] + 4]) = length; + *((uint64_t *) &data[offsets[colIndex]]) = offset; // cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]]) // << " length " << *((uint32_t *) &data[offsets[colIndex] + 4]) // << endl; @@ -645,8 +644,7 @@ inline void Row::setStringField(const uint8_t *strdata, uint32_t length, uint32_ inline std::string Row::getStringField(uint32_t colIndex) const { if (inStringTable(colIndex)) - return strings->getString(*((uint32_t *) &data[offsets[colIndex]]), - *((uint32_t *) &data[offsets[colIndex] + 4])); + return strings->getString(*((uint64_t *) &data[offsets[colIndex]])); // Not all CHAR/VARCHAR are NUL terminated so use length return std::string((char *) &data[offsets[colIndex]], strnlen((char *) &data[offsets[colIndex]], getColumnWidth(colIndex))); @@ -662,21 +660,21 @@ inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const { if (inStringTable(colIndex)) - return *((uint32_t *) &data[offsets[colIndex] + 4]); + return strings->getStringLength(*((uint64_t *) &data[offsets[colIndex]]));; return *((uint16_t*) &data[offsets[colIndex]]); } inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const { if (inStringTable(colIndex)) - return strings->getPointer(*((uint32_t *) &data[offsets[colIndex]])); + return strings->getPointer(*((uint64_t *) &data[offsets[colIndex]])); return &data[offsets[colIndex] + 2]; } inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) const { if (inStringTable(colIndex)) { - len = *((uint32_t *) &data[offsets[colIndex] + 4]); + len = strings->getStringLength(*((uint64_t *) &data[offsets[colIndex]])); return getVarBinaryField(colIndex); } else { @@ -854,9 +852,8 @@ inline void Row::setVarBinaryField(const uint8_t *val, uint32_t len, uint32_t co if (len > getColumnWidth(colIndex)) len = getColumnWidth(colIndex); if (inStringTable(colIndex)) { - uint32_t offset = strings->storeString(val, len); - *((uint32_t *) &data[offsets[colIndex]]) = offset; - *((uint32_t *) &data[offsets[colIndex] + 4]) = len; + uint64_t offset = strings->storeString(val, len); + *((uint64_t *) &data[offsets[colIndex]]) = offset; } else { *((uint16_t*) &data[offsets[colIndex]]) = len; @@ -1535,49 +1532,53 @@ inline void copyRow(const Row &in, Row *out) copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount())); } -inline std::string StringStore::getString(uint32_t off, uint32_t len) const +inline std::string StringStore::getString(uint64_t off) const { - if (off == std::numeric_limits::max()) + uint32_t length; + if (off == std::numeric_limits::max()) return joblist::CPNULLSTRMARK; MemChunk *mc; - if (off & 0x80000000) + if (off & 0x8000000000000000) { - off = off - 0x80000000; + off = off - 0x8000000000000000; if (longStrings.size() <= off) return joblist::CPNULLSTRMARK; mc = (MemChunk*) longStrings[off].get(); - return std::string((char *) mc->data, len); + memcpy(&length, mc->data, 4); + return std::string((char *) mc->data+4, length); } - uint32_t chunk = off / CHUNK_SIZE; - uint32_t offset = off % CHUNK_SIZE; + uint64_t chunk = off / CHUNK_SIZE; + uint64_t offset = off % CHUNK_SIZE; // this has to handle uninitialized data as well. If it's uninitialized it doesn't matter // what gets returned, it just can't go out of bounds. if (mem.size() <= chunk) return joblist::CPNULLSTRMARK; mc = (MemChunk *) mem[chunk].get(); - if ((offset + len) > mc->currentSize) + + memcpy(&length, &mc->data[offset], 4); + if ((offset + length) > mc->currentSize) return joblist::CPNULLSTRMARK; - - return std::string((char *) &(mc->data[offset]), len); + + return std::string((char *) &(mc->data[offset])+4, length); } -inline const uint8_t * StringStore::getPointer(uint32_t off) const +inline const uint8_t * StringStore::getPointer(uint64_t off) const { - if (off == std::numeric_limits::max()) + if (off == std::numeric_limits::max()) return (const uint8_t *) joblist::CPNULLSTRMARK.c_str(); - uint32_t chunk = off / CHUNK_SIZE; - uint32_t offset = off % CHUNK_SIZE; + uint64_t chunk = off / CHUNK_SIZE; + uint64_t offset = off % CHUNK_SIZE; MemChunk *mc; - if (off & 0x80000000) + if (off & 0x8000000000000000) { - off = off - 0x80000000; + off = off - 0x8000000000000000; if (longStrings.size() <= off) return (const uint8_t *) joblist::CPNULLSTRMARK.c_str(); mc = (MemChunk*) longStrings[off].get(); - return mc->data; + return mc->data+4; } // this has to handle uninitialized data as well. If it's uninitialized it doesn't matter // what gets returned, it just can't go out of bounds. @@ -1587,19 +1588,17 @@ inline const uint8_t * StringStore::getPointer(uint32_t off) const if (offset > mc->currentSize) return (const uint8_t *) joblist::CPNULLSTRMARK.c_str(); - return &(mc->data[offset]); + return &(mc->data[offset]) + 4; } -inline bool StringStore::isNullValue(uint32_t off, uint32_t len) const +inline bool StringStore::isNullValue(uint64_t off) const { - if (off == std::numeric_limits::max() || len == 0) + uint32_t length; + if (off == std::numeric_limits::max()) return true; - if (len < 8) - return false; - // Long strings won't be NULL - if (off & 0x80000000) + if (off & 0x8000000000000000) return false; uint32_t chunk = off / CHUNK_SIZE; @@ -1609,31 +1608,38 @@ inline bool StringStore::isNullValue(uint32_t off, uint32_t len) const return true; mc = (MemChunk *) mem[chunk].get(); - if ((offset + len) > mc->currentSize) + memcpy(&length, &mc->data[offset], 4); + if (length == 0) return true; - if (mc->data[offset] == 0) // "" = NULL string for some reason... + if (length < 8) + return false; + if ((offset + length) > mc->currentSize) return true; - return (*((uint64_t *) &mc->data[offset]) == *((uint64_t *) joblist::CPNULLSTRMARK.c_str())); + if (mc->data[offset+4] == 0) // "" = NULL string for some reason... + return true; + return (*((uint64_t *) &mc->data[offset]+4) == *((uint64_t *) joblist::CPNULLSTRMARK.c_str())); } -inline bool StringStore::equals(const std::string &str, uint32_t off, uint32_t len) const +inline bool StringStore::equals(const std::string &str, uint64_t off) const { - if (off == std::numeric_limits::max() || len == 0) + uint32_t length; + if (off == std::numeric_limits::max()) return str == joblist::CPNULLSTRMARK; MemChunk *mc; - if (off & 0x80000000) + if (off & 0x8000000000000000) { - if (longStrings.size() <= (off - 0x80000000)) + if (longStrings.size() <= (off - 0x8000000000000000)) return false; - mc = (MemChunk *) longStrings[off - 0x80000000].get(); + mc = (MemChunk *) longStrings[off - 0x8000000000000000].get(); + memcpy(&length, mc->data, 4); // Not sure if this check it needed, but adds safety - if (len > mc->currentSize) + if (length > mc->currentSize) return false; - return (strncmp(str.c_str(), (const char*) mc->data, len) == 0); + return (strncmp(str.c_str(), (const char*) mc->data+4, length) == 0); } uint32_t chunk = off / CHUNK_SIZE; uint32_t offset = off % CHUNK_SIZE; @@ -1641,10 +1647,37 @@ inline bool StringStore::equals(const std::string &str, uint32_t off, uint32_t l return false; mc = (MemChunk *) mem[chunk].get(); - if ((offset + len) > mc->currentSize) + memcpy(&length, &mc->data[offset], 4); + if ((offset + length) > mc->currentSize) return false; - return (strncmp(str.c_str(), (const char *) &mc->data[offset], len) == 0); + return (strncmp(str.c_str(), (const char *) &mc->data[offset]+4, length) == 0); +} +inline uint32_t StringStore::getStringLength(uint64_t off) +{ + uint32_t length; + MemChunk *mc; + if (off == std::numeric_limits::max()) + return 0; + if (off & 0x8000000000000000) + { + off = off - 0x8000000000000000; + if (longStrings.size() <= off) + return 0; + mc = (MemChunk*) longStrings[off].get(); + memcpy(&length, mc->data, 4); + } + else + { + uint64_t chunk = off / CHUNK_SIZE; + uint64_t offset = off % CHUNK_SIZE; + if (mem.size() <= chunk) + return 0; + mc = (MemChunk *) mem[chunk].get(); + memcpy(&length, &mc->data[offset], 4); + } + + return length; } inline bool StringStore::isEmpty() const