diff --git a/.drone.jsonnet b/.drone.jsonnet index 1e13ddfa6..f4431b567 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -6,13 +6,13 @@ local servers = { }; local platforms = { - develop: ['centos:7', 'rockylinux:8', 'rockylinux:9', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04'], - 'stable-23.10': ['centos:7', 'rockylinux:8', 'rockylinux:9', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04'], + develop: ['rockylinux:8', 'rockylinux:9', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04', 'ubuntu:24.04'], + 'stable-23.10': ['rockylinux:8', 'rockylinux:9', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04','ubuntu:24.04'], }; local platforms_arm = { - develop: ['rockylinux:8', 'rockylinux:9', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04'], - 'stable-23.10': ['rockylinux:8', 'rockylinux:9', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04'], + develop: ['rockylinux:8', 'rockylinux:9', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04', 'ubuntu:24.04'], + 'stable-23.10': ['rockylinux:8', 'rockylinux:9', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04', 'ubuntu:24.04'], }; local any_branch = '**'; @@ -40,11 +40,7 @@ local clang_update_alternatives = 'update-alternatives --install /usr/bin/clang local rpm_build_deps = 'install -y lz4 systemd-devel git make libaio-devel openssl-devel boost-devel bison ' + 'snappy-devel flex libcurl-devel libxml2-devel ncurses-devel automake libtool ' + 'policycoreutils-devel rpm-build lsof iproute pam-devel perl-DBI cracklib-devel ' + - 'expect createrepo '; - -local centos7_build_deps = 'yum install -y epel-release centos-release-scl ' + - '&& yum install -y pcre2-devel devtoolset-' + gcc_version + ' devtoolset-' + gcc_version + '-gcc cmake3 lz4-devel ' + - '&& ln -s /usr/bin/cmake3 /usr/bin/cmake && . /opt/rh/devtoolset-' + gcc_version + '/enable '; + 'expect createrepo python3 '; local rockylinux8_build_deps = "dnf install -y 'dnf-command(config-manager)' " + '&& dnf config-manager --set-enabled powertools ' + @@ -67,9 +63,6 @@ local mtr_suite_list = 'basic,bugfixes'; local mtr_full_set = 'basic,bugfixes,devregression,autopilot,extended,multinode,oracle,1pmonly'; local upgrade_test_lists = { - "centos7": { - "amd64": ["10.6.4-1", "10.6.5-2", "10.6.7-3", "10.6.8-4", "10.6.9-5", "10.6.11-6", "10.6.12-7", "10.6.14-9", "10.6.15-10"] - }, "rockylinux8": { "arm64": ["10.6.4-1", "10.6.9-5", "10.6.11-6", "10.6.12-7", "10.6.15-10"], "amd64": ["10.6.4-1", "10.6.5-2", "10.6.7-3", "10.6.8-4", "10.6.9-5", "10.6.11-6", "10.6.12-7", "10.6.14-9", "10.6.15-10"] @@ -94,17 +87,22 @@ local upgrade_test_lists = { "arm64": ["10.6.9-5", "10.6.11-6", "10.6.12-7", "10.6.14-9", "10.6.15-10"], "amd64": ["10.6.9-5", "10.6.11-6", "10.6.12-7", "10.6.14-9", "10.6.15-10"] }, + "ubuntu24.04": + { + "arm64": [], + "amd64": [] + }, }; local platformMap(platform, arch) = local platform_map = { - 'centos:7': centos7_build_deps + ' && yum ' + rpm_build_deps + ' && cmake ' + cmakeflags + ' -DRPM=centos7 && sleep $${BUILD_DELAY_SECONDS:-1s} && make -j$(nproc) package', 'rockylinux:8': rockylinux8_build_deps + ' && dnf ' + rpm_build_deps + ' && cmake ' + cmakeflags + ' -DRPM=rockylinux8 && sleep $${BUILD_DELAY_SECONDS:-1s} && make -j$(nproc) package', 'rockylinux:9': rockylinux9_build_deps + ' && dnf ' + rpm_build_deps + ' && cmake ' + cmakeflags + ' -DRPM=rockylinux9 && sleep $${BUILD_DELAY_SECONDS:-1s} && make -j$(nproc) package', 'debian:11': bootstrap_deps + ' && ' + deb_build_deps + ' && ' + debian11_deps + ' && ' + turnon_clang + " && sleep $${BUILD_DELAY_SECONDS:-1s} && CMAKEFLAGS='" + cmakeflags + " -DDEB=bullseye' debian/autobake-deb.sh", 'debian:12': bootstrap_deps + ' && ' + deb_build_deps + " && sleep $${BUILD_DELAY_SECONDS:-1s} && CMAKEFLAGS='" + cmakeflags + " -DDEB=bookworm' debian/autobake-deb.sh", 'ubuntu:20.04': bootstrap_deps + ' && ' + deb_build_deps + ' && ' + ubuntu20_04_deps + ' && ' + turnon_clang + " && sleep $${BUILD_DELAY_SECONDS:-1s} && CMAKEFLAGS='" + cmakeflags + " -DDEB=focal' debian/autobake-deb.sh", 'ubuntu:22.04': bootstrap_deps + ' && ' + deb_build_deps + " && sleep $${BUILD_DELAY_SECONDS:-1s} && CMAKEFLAGS='" + cmakeflags + " -DDEB=jammy' debian/autobake-deb.sh", + 'ubuntu:24.04': bootstrap_deps + ' && ' + deb_build_deps + " && sleep $${BUILD_DELAY_SECONDS:-1s} && CMAKEFLAGS='" + cmakeflags + " -DDEB=jammy' debian/autobake-deb.sh", }; local result = std.strReplace(std.strReplace(platform, ':', ''), '/', '-'); 'export CLICOLOR_FORCE=1; ' + platform_map[platform] + ' | storage/columnstore/columnstore/build/ansi2txt.sh ' + result + '/build.log'; @@ -112,38 +110,40 @@ local platformMap(platform, arch) = local testRun(platform) = local platform_map = { - 'centos:7': 'ctest3 -R columnstore: -j $(nproc) --output-on-failure', 'rockylinux:8': 'ctest3 -R columnstore: -j $(nproc) --output-on-failure', 'rockylinux:9': 'ctest3 -R columnstore: -j $(nproc) --output-on-failure', 'debian:11': 'cd builddir; ctest -R columnstore: -j $(nproc) --output-on-failure', 'debian:12': 'cd builddir; ctest -R columnstore: -j $(nproc) --output-on-failure', 'ubuntu:20.04': 'cd builddir; ctest -R columnstore: -j $(nproc) --output-on-failure', 'ubuntu:22.04': 'cd builddir; ctest -R columnstore: -j $(nproc) --output-on-failure', + 'ubuntu:24.04': 'cd builddir; ctest -R columnstore: -j $(nproc) --output-on-failure', + }; platform_map[platform]; local testPreparation(platform) = local platform_map = { - 'centos:7': 'yum -y install epel-release && yum install -y git cppunit-devel cmake3 boost-devel snappy-devel', - 'rockylinux:8': rockylinux8_build_deps + ' && dnf install -y git lz4 cppunit-devel cmake3 boost-devel snappy-devel', - 'rockylinux:9': rockylinux9_build_deps + ' && dnf install -y git lz4 cppunit-devel cmake3 boost-devel snappy-devel', - 'debian:11': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake', - 'debian:12': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake', - 'ubuntu:20.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake', - 'ubuntu:22.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake', + 'rockylinux:8': rockylinux8_build_deps + ' && dnf install -y git lz4 cppunit-devel cmake3 boost-devel snappy-devel pcre2-devel', + 'rockylinux:9': rockylinux9_build_deps + ' && dnf install -y git lz4 cppunit-devel cmake3 boost-devel snappy-devel pcre2-devel', + 'debian:11': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev', + 'debian:12': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev', + 'ubuntu:20.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev', + 'ubuntu:22.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev', + 'ubuntu:24.04': 'apt update && apt install --yes git libboost-all-dev libcppunit-dev libsnappy-dev cmake libpcre2-dev', + }; platform_map[platform]; local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') = { - local pkg_format = if (std.split(platform, ':')[0] == 'centos' || std.split(platform, ':')[0] == 'rockylinux') then 'rpm' else 'deb', + local pkg_format = if (std.split(platform, ':')[0] == 'rockylinux') then 'rpm' else 'deb', local init = if (pkg_format == 'rpm') then '/usr/lib/systemd/systemd' else 'systemd', local mtr_path = if (pkg_format == 'rpm') then '/usr/share/mysql-test' else '/usr/share/mysql/mysql-test', local cmapi_path = '/usr/share/columnstore/cmapi', local etc_path = '/etc/columnstore', local socket_path = if (pkg_format == 'rpm') then '/var/lib/mysql/mysql.sock' else '/run/mysqld/mysqld.sock', local config_path_prefix = if (pkg_format == 'rpm') then '/etc/my.cnf.d/' else '/etc/mysql/mariadb.conf.d/50-', - local img = if (platform == 'centos:7' || platform == 'rockylinux:8') then platform else 'romcheck/' + std.strReplace(platform, '/', '-'), + local img = if (platform == 'rockylinux:8') then platform else 'detravi/' + std.strReplace(platform, '/', '-'), local regression_ref = if (branch == any_branch) then 'develop' else branch, // local regression_tests = if (std.startsWith(platform, 'debian') || std.startsWith(platform, 'ubuntu:20')) then 'test000.sh' else 'test000.sh,test001.sh', @@ -228,6 +228,9 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') local execInnerDocker(command, dockerImage, flags = '') = 'docker exec ' + flags + ' -t ' + dockerImage + ' ' + command, + local execInnerDockerNoTTY(command, dockerImage, flags = '') = + 'docker exec ' + flags + ' ' + dockerImage + ' ' + command, + local installRpmDeb(pkg_format, rpmpackages, debpackages) = if (pkg_format == 'rpm') then ' bash -c "yum install -y ' + rpmpackages + '"' @@ -248,6 +251,18 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') if (pkg_format == 'deb') then execInnerDocker('sed -i "s/exit 101/exit 0/g" /usr/sbin/policy-rc.d', dockerImage), + 'echo "Docker CGroups opts here"', + 'ls -al /sys/fs/cgroup/cgroup.controllers || true ', + 'ls -al /sys/fs/cgroup/ || true ', + 'ls -al /sys/fs/cgroup/memory || true', + "docker ps --filter=name=" + dockerImage, + + execInnerDocker('echo "Inner Docker CGroups opts here"', dockerImage), + execInnerDocker('ls -al /sys/fs/cgroup/cgroup.controllers || true', dockerImage), + execInnerDocker('ls -al /sys/fs/cgroup/ || true', dockerImage), + execInnerDocker('ls -al /sys/fs/cgroup/memory || true', dockerImage), + + execInnerDocker('mkdir core', dockerImage), execInnerDocker('chmod 777 core', dockerImage), 'docker cp core_dumps/. ' + dockerImage + ':/', @@ -288,7 +303,7 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') execInnerDocker("mkdir -p reg-logs", dockerImage("regression"), "--workdir /mariadb-columnstore-regression-test/mysql/queries/nightly/alltest"), execInnerDocker("bash -c 'sleep 4800 && bash /save_stack.sh /mariadb-columnstore-regression-test/mysql/queries/nightly/alltest/reg-logs/' & ", dockerImage("regresion")), - execInnerDocker('bash -c "timeout -k 1m -s SIGKILL --preserve-status $${REGRESSION_TIMEOUT} ./go.sh --sm_unit_test_dir=/storage-manager --tests=' + name + ' || ./regression_logs.sh ' + name + '"', + execInnerDockerNoTTY('bash -c "timeout -k 1m -s SIGKILL --preserve-status $${REGRESSION_TIMEOUT} ./go.sh --sm_unit_test_dir=/storage-manager --tests=' + name + ' || ./regression_logs.sh ' + name + '"', dockerImage("regression"), "--env PRESERVE_LOGS=true --workdir /mariadb-columnstore-regression-test/mysql/queries/nightly/alltest"), ], @@ -309,13 +324,14 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') image: 'docker', volumes: [pipeline._volumes.docker], commands: [ - 'docker run --env OS=' + result + ' --env PACKAGES_URL=' + packages_url + ' --env DEBIAN_FRONTEND=noninteractive --env MCS_USE_S3_STORAGE=0 --name smoke$${DRONE_BUILD_NUMBER} --ulimit core=-1 --privileged --detach ' + img + ' ' + init + ' --unit=basic.target'] + 'docker run --memory 3g --env OS=' + result + ' --env PACKAGES_URL=' + packages_url + ' --env DEBIAN_FRONTEND=noninteractive --env MCS_USE_S3_STORAGE=0 --name smoke$${DRONE_BUILD_NUMBER} --ulimit core=-1 --privileged --detach ' + img + ' ' + init + ' --unit=basic.target'] + prepareTestStage(dockerImage("smoke"), pkg_format, result, true) + [ installEngine(dockerImage("smoke"), pkg_format), 'sleep $${SMOKE_DELAY_SECONDS:-1s}', // start mariadb and mariadb-columnstore services and run simple query execInnerDocker('systemctl start mariadb', dockerImage("smoke")), - execInnerDocker('systemctl start mariadb-columnstore', dockerImage("smoke")), + execInnerDocker("/usr/bin/mcsSetConfig SystemConfig CGroup just_no_group_use_local", dockerImage("smoke")), + execInnerDocker('systemctl restart mariadb-columnstore', dockerImage("smoke")), execInnerDocker('mariadb -e "create database if not exists test; create table test.t1 (a int) engine=Columnstore; insert into test.t1 values (1); select * from test.t1"', dockerImage("smoke")), @@ -337,6 +353,7 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') }, }, commands: [ + // why do we mount cgroups here, but miss it on other steps? 'docker run --volume /sys/fs/cgroup:/sys/fs/cgroup:ro --env OS=' + result + ' --env PACKAGES_URL=' + packages_url + ' --env DEBIAN_FRONTEND=noninteractive --env MCS_USE_S3_STORAGE=0 --env UCF_FORCE_CONFNEW=1 --name upgrade$${DRONE_BUILD_NUMBER}' + version + ' --ulimit core=-1 --privileged --detach ' + img + ' ' + init + ' --unit=basic.target'] + prepareTestStage(dockerImage('upgrade') + version, pkg_format, result, false) + [ if (pkg_format == 'deb') @@ -345,9 +362,6 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') if (std.split(platform, ':')[0] == 'rockylinux') then execInnerDocker('bash -c "./upgrade_setup_rpm.sh '+ version + ' ' + result + ' ' + arch + ' ' + repo_pkg_url_no_res + ' $${UPGRADE_TOKEN}"', dockerImage('upgrade') + version), - if (std.split(platform, ':')[0] == 'centos') - then execInnerDocker('bash -c "./upgrade_setup_rpm.sh '+ version + ' ' + result + ' ' + arch + ' ' + repo_pkg_url_no_res + ' $${UPGRADE_TOKEN}"', - dockerImage('upgrade') + version), ], }, upgradelog:: { @@ -372,7 +386,7 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') MTR_FULL_SUITE: '${MTR_FULL_SUITE:-false}', }, commands: [ - 'docker run --shm-size=500m --env MYSQL_TEST_DIR=' + mtr_path + ' --env OS=' + result + ' --env PACKAGES_URL=' + packages_url + ' --env DEBIAN_FRONTEND=noninteractive --env MCS_USE_S3_STORAGE=0 --name mtr$${DRONE_BUILD_NUMBER} --ulimit core=-1 --privileged --detach ' + img + ' ' + init + ' --unit=basic.target'] + 'docker run --shm-size=500m --memory 8g --env MYSQL_TEST_DIR=' + mtr_path + ' --env OS=' + result + ' --env PACKAGES_URL=' + packages_url + ' --env DEBIAN_FRONTEND=noninteractive --env MCS_USE_S3_STORAGE=0 --name mtr$${DRONE_BUILD_NUMBER} --ulimit core=-1 --privileged --detach ' + img + ' ' + init + ' --unit=basic.target'] + prepareTestStage('mtr$${DRONE_BUILD_NUMBER}', pkg_format, result, true) + [ installEngine(dockerImage("mtr"), pkg_format), 'docker cp mysql-test/columnstore mtr$${DRONE_BUILD_NUMBER}:' + mtr_path + '/suite/', @@ -381,14 +395,11 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') execInnerDocker("bash -c 'sed -i /ProtectSystem/d $(systemctl show --property FragmentPath mariadb | sed s/FragmentPath=//)'", dockerImage('mtr')), execInnerDocker('systemctl daemon-reload', dockerImage("mtr")), execInnerDocker('systemctl start mariadb', dockerImage("mtr")), + // Set RAM consumption limits to avoid RAM contention b/w mtr and regression steps. + execInnerDocker("/usr/bin/mcsSetConfig SystemConfig CGroup just_no_group_use_local", dockerImage("mtr")), execInnerDocker('mariadb -e "create database if not exists test;"', dockerImage("mtr")), execInnerDocker('systemctl restart mariadb-columnstore', dockerImage("mtr")), - // Set RAM consumption limits to avoid RAM contention b/w mtr and regression steps. - //'docker exec -t mtr$${DRONE_BUILD_NUMBER} bash -c "/usr/bin/mcsSetConfig HashJoin TotalUmMemory 4G"', - //'docker exec -t mtr$${DRONE_BUILD_NUMBER} bash -c "/usr/bin/mcsSetConfig DBBC NumBlocksPct 1G"', - //'docker exec -t mtr$${DRONE_BUILD_NUMBER} bash -c "/usr/bin/mcsSetConfig SystemConfig CGroup $(docker ps --filter=name=mtr$${DRONE_BUILD_NUMBER} --quiet --no-trunc)"', - // delay mtr for manual debugging on live instance 'sleep $${MTR_DELAY_SECONDS:-1s}', 'MTR_SUITE_LIST=$([ "$MTR_FULL_SUITE" == true ] && echo "' + mtr_full_set + '" || echo "$MTR_SUITE_LIST")', @@ -459,12 +470,9 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') 'cd mariadb-columnstore-regression-test', 'git rev-parse --abbrev-ref HEAD && git rev-parse HEAD', 'cd ..', - 'docker run --shm-size=500m --env OS=' + result + ' --env PACKAGES_URL=' + packages_url + ' --env DEBIAN_FRONTEND=noninteractive --env MCS_USE_S3_STORAGE=0 --name regression$${DRONE_BUILD_NUMBER} --ulimit core=-1 --privileged --detach ' + img + ' ' + init + ' --unit=basic.target'] + 'docker run --shm-size=500m --memory 10g --env OS=' + result + ' --env PACKAGES_URL=' + packages_url + ' --env DEBIAN_FRONTEND=noninteractive --env MCS_USE_S3_STORAGE=0 --name regression$${DRONE_BUILD_NUMBER} --ulimit core=-1 --privileged --detach ' + img + ' ' + init + ' --unit=basic.target'] + prepareTestStage(dockerImage('regression'), pkg_format, result, true) + [ - if (platform == 'centos:7') then - execInnerDocker('bash -c "yum install -y sysvinit-tools"', dockerImage('regression')), - 'docker cp mariadb-columnstore-regression-test regression$${DRONE_BUILD_NUMBER}:/', // list storage manager binary 'ls -la /mdb/' + builddir + '/storage/columnstore/columnstore/storage-manager', @@ -479,11 +487,10 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') execInnerDocker('sed -i "/^.mariadb.$/a lower_case_table_names=1" ' + config_path_prefix + 'server.cnf', dockerImage('regression')), // set default client character set to utf-8 execInnerDocker('sed -i "/^.client.$/a default-character-set=utf8" ' + config_path_prefix + 'client.cnf',dockerImage('regression')), + // Set RAM consumption limits to avoid RAM contention b/w mtr andregression steps. - //'docker exec -t regression$${DRONE_BUILD_NUMBER} bash -c "/usr/bin/mcsSetConfig HashJoin TotalUmMemory 5G"', - //'docker exec -t regressin$${DRONE_BUILD_NUMBER} bash -c "/usr/bin/mcsSetConfig DBBC NumBlocksPct 2G"', - //'docker exec -t regression$${DRONE_BUILD_NUMBER} bash -c "/usr/bin/mcsSetConfig SystemConfig CGroup $(docker ps --filter=name=regression$${DRONE_BUILD_NUMBER} --quiet --no-trunc)"', - // start mariadb and mariadb-columnstore services + execInnerDocker("/usr/bin/mcsSetConfig SystemConfig CGroup just_no_group_use_local", dockerImage("regression")), + execInnerDocker('systemctl start mariadb',dockerImage('regression')), execInnerDocker('systemctl restart mariadb-columnstore',dockerImage('regression')), // delay regression for manual debugging on live instance @@ -628,7 +635,6 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') 'cd cmapi', if (platform == 'rockylinux:9') then 'dnf install -y yum-utils && dnf config-manager --set-enabled devel && dnf update -y', if (pkg_format == 'rpm') then 'yum install -y cmake make rpm-build libarchive createrepo findutils redhat-lsb-core' else 'apt update && apt install --no-install-recommends -y cmake make dpkg-dev lsb-release', - if (platform == 'centos:7') then 'yum install -y epel-release && yum install -y cmake3 && ln -sf /usr/bin/cmake3 /usr/bin/cmake', './cleanup.sh', 'cmake -D' + std.asciiUpper(pkg_format) + '=1 -DSERVER_DIR=/mdb/' + builddir + ' . && make package', 'mkdir ./' + result, @@ -732,8 +738,6 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') 'git config cmake.update-submodules no', 'rm -rf storage/columnstore/columnstore', 'cp -r /drone/src /mdb/' + builddir + '/storage/columnstore/columnstore', - if (std.split(platform, ':')[0] == 'centos') then 'wget -P /mdb/ https://cspkg.s3.amazonaws.com/MariaDB-Compat/MariaDB-shared-10.1.kvm-rpm-centos74-amd64.rpm', - if (std.split(platform, ':')[0] == 'centos') then 'wget -P /mdb/ https://cspkg.s3.amazonaws.com/MariaDB-Compat/MariaDB-shared-5.3.amd64.rpm', ], }, { @@ -769,7 +773,7 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') // Disable dh_missing strict check for missing files 'sed -i s/--fail-missing/--list-missing/ debian/rules', // Tweak debian packaging stuff - 'for i in mariadb-backup mariadb-plugin libmariadbd; do sed -i "/Package: $i.*/,/^$/d" debian/control; done', + 'for i in mariadb-plugin libmariadbd; do sed -i "/Package: $i.*/,/^$/d" debian/control; done', "sed -i 's/Depends: galera.*/Depends:/' debian/control", 'for i in galera wsrep ha_sphinx embedded; do sed -i /$i/d debian/*.install; done', // Install build dependencies for deb @@ -780,7 +784,7 @@ local Pipeline(branch, platform, event, arch='amd64', server='10.6-enterprise') get_sccache, testPreparation(platform), // disable LTO for 22.04 for now - if (platform == 'ubuntu:22.04') then 'apt install -y lto-disabled-list && for i in mariadb-plugin-columnstore mariadb-server mariadb-server-core mariadb mariadb-10.6; do echo "$i any" >> /usr/share/lto-disabled-list/lto-disabled-list; done && grep mariadb /usr/share/lto-disabled-list/lto-disabled-list', + if (platform == 'ubuntu:22.04' || platform == 'ubuntu:24.04') then 'apt install -y lto-disabled-list && for i in mariadb-plugin-columnstore mariadb-server mariadb-server-core mariadb mariadb-10.6; do echo "$i any" >> /usr/share/lto-disabled-list/lto-disabled-list; done && grep mariadb /usr/share/lto-disabled-list/lto-disabled-list', platformMap(platform, arch), 'sccache --show-stats', // move engine and cmapi packages to one dir to make a repo diff --git a/CMakeLists.txt b/CMakeLists.txt index e56a4a0f2..081b8467c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,12 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.13) PROJECT(Columnstore) INCLUDE (CheckCXXSourceCompiles) +# MariaDB server adds this flags by default for Debug builds, and this makes our external projects +# ABI incompatible with debug STL containers +STRING(REPLACE -D_GLIBCXX_DEBUG "" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) +STRING(REPLACE -D_GLIBCXX_ASSERTIONS "" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + + FOREACH(p CMP0135) IF(POLICY ${p}) CMAKE_POLICY(SET ${p} NEW) diff --git a/README.md b/README.md index fa7b4f880..7839380bf 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# MariaDB ColumnStore Storage engine 23.02.X +# MariaDB ColumnStore Storage engine 23.10.X It is built by porting InfiniDB 4.6.7 on MariaDB and adding entirely new features not found anywhere else. diff --git a/build/bootstrap_mcs.sh b/build/bootstrap_mcs.sh index a10cb6d52..7ba080db3 100755 --- a/build/bootstrap_mcs.sh +++ b/build/bootstrap_mcs.sh @@ -48,6 +48,7 @@ optparse.define short=j long=parallel desc="Number of paralles for build" variab optparse.define short=F long=show-build-flags desc="Print CMake flags, while build" variable=PRINT_CMAKE_FLAGS default=false value=true optparse.define short=c long=cloud desc="Enable cloud storage" variable=CLOUD_STORAGE_ENABLED default=false value=true optparse.define short=f long=do-not-freeze-revision desc="Disable revision freezing, or do not set 'update none' for columnstore submodule in MDB repository" variable=DO_NOT_FREEZE_REVISION default=false value=true +optparse.define short=a long=build-path variable=MARIA_BUILD_PATH default=$MDB_SOURCE_PATH/../MariaDBBuild source $( optparse.build ) @@ -64,10 +65,12 @@ INSTALL_PREFIX="/usr/" DATA_DIR="/var/lib/mysql/data" CMAKE_BIN_NAME=cmake CTEST_BIN_NAME=ctest -CONFIG_DIR="/etc/my.cnf.d" +RPM_CONFIG_DIR="/etc/my.cnf.d" +DEB_CONFIG_DIR="/etc/mysql/mariadb.conf.d" +CONFIG_DIR=$RPM_CONFIG_DIR if [[ $OS = 'Ubuntu' || $OS = 'Debian' ]]; then - CONFIG_DIR="/etc/mysql/mariadb.conf.d" + CONFIG_DIR=$DEB_CONFIG_DIR fi export CLICOLOR_FORCE=1 @@ -99,7 +102,7 @@ select_branch() fi cd - > /dev/null - message "Columnstore will be built from $color_yellow$CURRENT_BRANCH$color_normal branch" + message "Columnstore will be built from $color_yellow$CURRENT_BRANCH$color_cyan branch" } install_deps() @@ -196,18 +199,24 @@ clean_old_installation() rm -rf /var/lib/columnstore/local/ rm -rf /var/lib/columnstore/storagemanager/* rm -rf /var/log/mariadb/columnstore/* + rm -rf /etc/mysql/mariadb.conf.d/columnstore.cnf /etc/my.cnf.d/columnstore.cnf rm -rf /tmp/* rm -rf $REPORT_PATH rm -rf /var/lib/mysql rm -rf /var/run/mysqld rm -rf $DATA_DIR rm -rf /etc/mysql + rm -rf /etc/my.cnf.d/columnstore.cnf + rm -rf /etc/mysql/mariadb.conf.d/columnstore.cnf } build() { + MARIA_BUILD_PATH=$(realpath $MARIA_BUILD_PATH) message_split - message "Building sources in $color_yellow$MCS_BUILD_TYPE$color_normal mode" + message "Building sources in $color_yellow$MCS_BUILD_TYPE$color_cyan mode" + message "Compiled artifacts will be written to $color_yellow$MARIA_BUILD_PATH$color_cyan" + mkdir -p $MARIA_BUILD_PATH local MDB_CMAKE_FLAGS="-DWITH_SYSTEMD=yes -DPLUGIN_COLUMNSTORE=YES @@ -324,12 +333,12 @@ build() fi message "Configuring cmake silently" - ${CMAKE_BIN_NAME} -DCMAKE_BUILD_TYPE=$MCS_BUILD_TYPE $MDB_CMAKE_FLAGS . | spinner + ${CMAKE_BIN_NAME} -DCMAKE_BUILD_TYPE=$MCS_BUILD_TYPE $MDB_CMAKE_FLAGS -S$MDB_SOURCE_PATH -B$MARIA_BUILD_PATH | spinner message_split - ${CMAKE_BIN_NAME} --build . -j $CPUS | onelinearizator && \ + ${CMAKE_BIN_NAME} --build $MARIA_BUILD_PATH -j $CPUS | onelinearizator && \ message "Installing silently" && - ${CMAKE_BIN_NAME} --install . | spinner 30 + ${CMAKE_BIN_NAME} --install $MARIA_BUILD_PATH | spinner 30 if [ $? -ne 0 ]; then message_split @@ -342,15 +351,16 @@ build() check_user_and_group() { - if [ -z "$(grep mysql /etc/passwd)" ]; then - message "Adding user mysql into /etc/passwd" - useradd -r -U mysql -d /var/lib/mysql + user=$1 + if [ -z "$(grep $user /etc/passwd)" ]; then + message "Adding user $user into /etc/passwd" + useradd -r -U $user -d /var/lib/mysql fi - if [ -z "$(grep mysql /etc/group)" ]; then + if [ -z "$(grep $user /etc/group)" ]; then GroupID = `awk -F: '{uid[$3]=1}END{for(x=100; x<=999; x++) {if(uid[x] != ""){}else{print x; exit;}}}' /etc/group` - message "Adding group mysql with id $GroupID" - groupadd -g GroupID mysql + message "Adding group $user with id $GroupID" + groupadd -g $GroupID $user fi } @@ -361,7 +371,7 @@ run_unit_tests() warn "Skipping unittests" else message "Running unittests" - cd $MDB_SOURCE_PATH + cd $MARIA_BUILD_PATH ${CTEST_BIN_NAME} . -R columnstore: -j $(nproc) --progress cd - > /dev/null fi @@ -384,14 +394,12 @@ disable_plugins_for_bootstrap() { find /etc -type f -exec sed -i 's/plugin-load-add=auth_gssapi.so//g' {} + find /etc -type f -exec sed -i 's/plugin-load-add=ha_columnstore.so//g' {} + - find /etc -type f -exec sed -i 's/columnstore_use_import_for_batchinsert = ON//g' {} + } enable_columnstore_back() { echo plugin-load-add=ha_columnstore.so >> $CONFIG_DIR/columnstore.cnf sed -i '/\[mysqld\]/a\plugin-load-add=ha_columnstore.so' $CONFIG_DIR/columnstore.cnf - sed -i '/plugin-load-add=ha_columnstore.so/a\columnstore_use_import_for_batchinsert = ON' $CONFIG_DIR/columnstore.cnf } fix_config_files() @@ -442,25 +450,32 @@ fix_config_files() systemctl daemon-reload } +make_dir() +{ + mkdir -p $1 + chown mysql:mysql $1 +} + install() { message_split message "Installing MariaDB" disable_plugins_for_bootstrap - mkdir -p $REPORT_PATH + make_dir $REPORT_PATH chmod 777 $REPORT_PATH - check_user_and_group + check_user_and_group mysql + check_user_and_group syslog - mkdir -p /etc/my.cnf.d - bash -c 'echo "[client-server] -socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf' + make_dir $CONFIG_DIR + + echo "[client-server] +socket=/run/mysqld/mysqld.sock" > $CONFIG_DIR/socket.cnf mv $INSTALL_PREFIX/lib/mysql/plugin/ha_columnstore.so /tmp/ha_columnstore_1.so || mv $INSTALL_PREFIX/lib64/mysql/plugin/ha_columnstore.so /tmp/ha_columnstore_2.so - mkdir -p /var/lib/mysql - chown mysql:mysql /var/lib/mysql + make_dir /var/lib/mysql message "Running mysql_install_db" sudo -u mysql mysql_install_db --rpm --user=mysql > /dev/null @@ -468,7 +483,7 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf' enable_columnstore_back - mkdir -p /etc/columnstore + make_dir /etc/columnstore cp $MDB_SOURCE_PATH/storage/columnstore/columnstore/oam/etc/Columnstore.xml /etc/columnstore/Columnstore.xml cp $MDB_SOURCE_PATH/storage/columnstore/columnstore/storage-manager/storagemanager.cnf /etc/columnstore/storagemanager.cnf @@ -477,8 +492,8 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf' cp $MDB_SOURCE_PATH/storage/columnstore/columnstore/oam/install_scripts/*.service /lib/systemd/system/ if [[ "$OS" = 'Ubuntu' || "$OS" = 'Debian' ]]; then - mkdir -p /usr/share/mysql - mkdir -p /etc/mysql/ + make_dir /usr/share/mysql + make_dir /etc/mysql/ cp $MDB_SOURCE_PATH/debian/additions/debian-start.inc.sh /usr/share/mysql/debian-start.inc.sh cp $MDB_SOURCE_PATH/debian/additions/debian-start /etc/mysql/debian-start > /etc/mysql/debian.cnf @@ -486,6 +501,7 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf' fix_config_files + make_dir /etc/my.cnf.d if [ -d "/etc/mysql/mariadb.conf.d/" ]; then message "Copying configs from /etc/mysql/mariadb.conf.d/ to /etc/my.cnf.d" cp -rp /etc/mysql/mariadb.conf.d/* /etc/my.cnf.d @@ -496,15 +512,11 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf' cp -rp /etc/mysql/conf.d/* /etc/my.cnf.d fi - mkdir -p /var/lib/columnstore/data1 - mkdir -p /var/lib/columnstore/data1/systemFiles - mkdir -p /var/lib/columnstore/data1/systemFiles/dbrm - mkdir -p /run/mysqld/ - - mkdir -p $DATA_DIR - chown -R mysql:mysql $DATA_DIR - chown -R mysql:mysql /var/lib/columnstore/ - chown -R mysql:mysql /run/mysqld/ + make_dir /var/lib/columnstore/data1 + make_dir /var/lib/columnstore/data1/systemFiles + make_dir /var/lib/columnstore/data1/systemFiles/dbrm + make_dir /run/mysqld/ + make_dir $DATA_DIR chmod +x $INSTALL_PREFIX/bin/mariadb* @@ -513,7 +525,7 @@ socket=/run/mysqld/mysqld.sock" > /etc/my.cnf.d/socket.cnf' start_storage_manager_if_needed message "Running columnstore-post-install" - mkdir -p /var/lib/columnstore/local + make_dir /var/lib/columnstore/local columnstore-post-install --rpmmode=install message "Running install_mcs_mysql" install_mcs_mysql.sh diff --git a/cmapi/CMakeLists.txt b/cmapi/CMakeLists.txt index 6d4e6fe03..b546560ed 100644 --- a/cmapi/CMakeLists.txt +++ b/cmapi/CMakeLists.txt @@ -84,6 +84,9 @@ INSTALL(FILES mcs_aws INSTALL(FILES mcs_gsutil PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ DESTINATION ${BIN_DIR}) +INSTALL(FILES scripts/mcs_backup_manager.sh scripts/cs_package_manager.sh + PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ + DESTINATION ${BIN_DIR}) OPTION(RPM "Build an RPM" OFF) IF(RPM) @@ -135,8 +138,11 @@ IF(RPM) STRING(REGEX MATCH "^." OS_VERSION_MAJOR "${LSB_RELEASE_VERSION_SHORT}") MESSAGE(STATUS ${OS_VERSION_MAJOR}) - IF (LSB_RELEASE_ID_SHORT MATCHES "centos|rocky") + IF (LSB_RELEASE_ID_SHORT MATCHES "centos|rocky|rhel|alma") SET(OS_NAME_SHORT "el") + IF (OS_VERSION_MAJOR MATCHES "9") + SET(CPACK_RPM_PACKAGE_REQUIRES "libxcrypt-compat") + ENDIF() ELSE() SET(OS_NAME_SHORT "unknown") ENDIF() diff --git a/cmapi/cmapi_server/controllers/dispatcher.py b/cmapi/cmapi_server/controllers/dispatcher.py index 76da33f2b..3d1aaa0d9 100644 --- a/cmapi/cmapi_server/controllers/dispatcher.py +++ b/cmapi/cmapi_server/controllers/dispatcher.py @@ -6,7 +6,7 @@ from cmapi_server.controllers.endpoints import ( StatusController, ConfigController, BeginController, CommitController, RollbackController, StartController, ShutdownController, ExtentMapController, ClusterController, ApiKeyController, - LoggingConfigController, AppController + LoggingConfigController, AppController, NodeProcessController ) from cmapi_server.controllers.s3dataload import S3DataLoadController @@ -241,6 +241,26 @@ dispatcher.connect( ) +# /_version/node/stop_dmlproc/ (PUT) +dispatcher.connect( + name = 'stop_dmlproc', + route = f'/cmapi/{_version}/node/stop_dmlproc', + action = 'put_stop_dmlproc', + controller = NodeProcessController(), + conditions = {'method': ['PUT']} +) + + +# /_version/node/is_process_running/ (PUT) +dispatcher.connect( + name = 'is_process_running', + route = f'/cmapi/{_version}/node/is_process_running', + action = 'get_process_running', + controller = NodeProcessController(), + conditions = {'method': ['GET']} +) + + def jsonify_error(status, message, traceback, version): \ # pylint: disable=unused-argument """JSONify all CherryPy error responses (created by raising the diff --git a/cmapi/cmapi_server/controllers/endpoints.py b/cmapi/cmapi_server/controllers/endpoints.py index 0f190ba9f..bf3b009c1 100644 --- a/cmapi/cmapi_server/controllers/endpoints.py +++ b/cmapi/cmapi_server/controllers/endpoints.py @@ -63,6 +63,23 @@ def raise_422_error( raise APIError(422, err_msg) +# TODO: Move somwhere else, eg. to helpers +def get_use_sudo(app_config: dict) -> bool: + """Get value about using superuser or not from app config. + + :param app_config: CherryPy application config + :type app_config: dict + :return: use_sudo config value + :rtype: bool + """ + privileges_section = app_config.get('Privileges', None) + if privileges_section is not None: + use_sudo = privileges_section.get('use_sudo', False) + else: + use_sudo = False + return use_sudo + + @cherrypy.tools.register('before_handler', priority=80) def validate_api_key(): """Validate API key. @@ -513,6 +530,7 @@ IP address.") module_logger.debug(f'{func_name} returns {str(begin_response)}') return begin_response + class CommitController: @cherrypy.tools.timeit() @cherrypy.tools.json_in() @@ -601,15 +619,6 @@ class RollbackController: return rollback_response -def get_use_sudo(app_config): - privileges_section = app_config.get('Privileges', None) - if privileges_section is not None: - use_sudo = privileges_section.get('use_sudo', False) - else: - use_sudo = False - return use_sudo - - class StartController: @cherrypy.tools.timeit() @cherrypy.tools.json_out() @@ -1137,3 +1146,59 @@ class AppController(): return {'started': True} else: raise APIError(503, 'CMAPI not ready to handle requests.') + + +class NodeProcessController(): + + @cherrypy.tools.timeit() + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + @cherrypy.tools.validate_api_key() # pylint: disable=no-member + def put_stop_dmlproc(self): + """Handler for /node/stop_dmlproc (PUT) endpoint.""" + # TODO: make it works only from cli tool like set_api_key made + func_name = 'put_stop_dmlproc' + log_begin(module_logger, func_name) + + request = cherrypy.request + request_body = request.json + timeout = request_body.get('timeout', 10) + force = request_body.get('force', False) + + if force: + module_logger.debug( + f'Calling DMLproc to force stop after timeout={timeout}.' + ) + MCSProcessManager.stop( + name='DMLProc', is_primary=True, use_sudo=True, timeout=timeout + ) + else: + module_logger.debug('Callling stop DMLproc gracefully.') + try: + MCSProcessManager.gracefully_stop_dmlproc() + except (ConnectionRefusedError, RuntimeError): + raise_422_error( + logger=module_logger, func_name=func_name, + err_msg='Couldn\'t stop DMlproc gracefully' + ) + response = {'timestamp': str(datetime.now())} + module_logger.debug(f'{func_name} returns {str(response)}') + return response + + @cherrypy.tools.timeit() + @cherrypy.tools.json_out() + @cherrypy.tools.validate_api_key() # pylint: disable=no-member + def get_process_running(self, process_name): + """Handler for /node/is_process_running (GET) endpoint.""" + func_name = 'get_process_running' + log_begin(module_logger, func_name) + + process_running = MCSProcessManager.is_service_running(process_name) + + response = { + 'timestamp': str(datetime.now()), + 'process_name': process_name, + 'running': process_running + } + module_logger.debug(f'{func_name} returns {str(response)}') + return response diff --git a/cmapi/cmapi_server/handlers/cluster.py b/cmapi/cmapi_server/handlers/cluster.py index f8988ad9a..628c3da6b 100644 --- a/cmapi/cmapi_server/handlers/cluster.py +++ b/cmapi/cmapi_server/handlers/cluster.py @@ -1,6 +1,7 @@ """Module contains Cluster business logic functions.""" import logging from datetime import datetime +from typing import Optional import requests @@ -13,6 +14,7 @@ from cmapi_server.helpers import ( get_config_parser, get_current_key, get_id, get_version, start_transaction, rollback_transaction, update_revision_and_manager, ) +from cmapi_server.managers.transaction import TransactionManager from cmapi_server.node_manipulation import ( add_node, add_dbroot, remove_node, switch_node_maintenance, ) @@ -96,7 +98,7 @@ class ClusterHandler(): try: suceeded, transaction_id, successes = start_transaction( - cs_config_filename=config, id=transaction_id + cs_config_filename=config, txn_id=transaction_id ) except Exception as err: rollback_transaction(transaction_id, cs_config_filename=config) @@ -141,7 +143,9 @@ class ClusterHandler(): @staticmethod def shutdown( config: str = DEFAULT_MCS_CONF_PATH, - logger: logging.Logger = logging.getLogger('cmapi_server') + logger: logging.Logger = logging.getLogger('cmapi_server'), + in_transaction: bool = False, + timeout: int = 15 ) -> dict: """Method to stop the MCS Cluster. @@ -150,6 +154,11 @@ class ClusterHandler(): :type config: str, optional :param logger: logger, defaults to logging.getLogger('cmapi_server') :type logger: logging.Logger, optional + :param in_transaction: is function called in existing transaction or no + :type in_transaction: bool + :param timeout: timeout in seconds to gracefully stop DMLProc + TODO: for next releases + :type timeout: int :raises CMAPIBasicError: if no nodes in the cluster :return: start timestamp :rtype: dict @@ -158,49 +167,28 @@ class ClusterHandler(): 'Cluster shutdown command called. Shutting down the cluster.' ) + def process_shutdown(): + """Raw node shutdown processing.""" + switch_node_maintenance(True) + update_revision_and_manager() + + # TODO: move this from multiple places to one, eg to helpers + try: + broadcast_successful = broadcast_new_config(config) + except Exception as err: + raise CMAPIBasicError( + 'Error while distributing config file.' + ) from err + + if not broadcast_successful: + raise CMAPIBasicError('Config distribution isn\'t successful.') + start_time = str(datetime.now()) - transaction_id = get_id() - - try: - suceeded, transaction_id, successes = start_transaction( - cs_config_filename=config, id=transaction_id - ) - except Exception as err: - rollback_transaction(transaction_id, cs_config_filename=config) - raise CMAPIBasicError( - 'Error while starting the transaction.' - ) from err - if not suceeded: - rollback_transaction(transaction_id, cs_config_filename=config) - raise CMAPIBasicError('Starting transaction isn\'t successful.') - - if suceeded and len(successes) == 0: - rollback_transaction(transaction_id, cs_config_filename=config) - raise CMAPIBasicError('There are no nodes in the cluster.') - - switch_node_maintenance(True) - update_revision_and_manager() - - # TODO: move this from multiple places to one, eg to helpers - try: - broadcast_successful = broadcast_new_config(config) - except Exception as err: - rollback_transaction(transaction_id, cs_config_filename=config) - raise CMAPIBasicError( - 'Error while distributing config file.' - ) from err - - if not broadcast_successful: - rollback_transaction(transaction_id, cs_config_filename=config) - raise CMAPIBasicError('Config distribution isn\'t successful.') - - try: - commit_transaction(transaction_id, cs_config_filename=config) - except Exception as err: - rollback_transaction(transaction_id, cs_config_filename=config) - raise CMAPIBasicError( - 'Error while committing transaction.' - ) from err + if not in_transaction: + with TransactionManager(): + process_shutdown() + else: + process_shutdown() logger.debug('Successfully finished shutting down the cluster.') return {'timestamp': start_time} @@ -236,7 +224,7 @@ class ClusterHandler(): try: suceeded, transaction_id, successes = start_transaction( cs_config_filename=config, extra_nodes=[node], - id=transaction_id + txn_id=transaction_id ) except Exception as err: rollback_transaction(transaction_id, cs_config_filename=config) @@ -321,7 +309,7 @@ class ClusterHandler(): try: suceeded, transaction_id, txn_nodes = start_transaction( cs_config_filename=config, remove_nodes=[node], - id=transaction_id + txn_id=transaction_id ) except Exception as err: rollback_transaction(transaction_id, cs_config_filename=config) @@ -425,7 +413,7 @@ class ClusterHandler(): try: suceeded, transaction_id, successes = start_transaction( - cs_config_filename=config, id=transaction_id + cs_config_filename=config, txn_id=transaction_id ) except Exception as err: rollback_transaction(transaction_id, cs_config_filename=config) diff --git a/cmapi/cmapi_server/helpers.py b/cmapi/cmapi_server/helpers.py index 91045ca73..ea2b61726 100644 --- a/cmapi/cmapi_server/helpers.py +++ b/cmapi/cmapi_server/helpers.py @@ -11,10 +11,12 @@ import logging import os import socket import time +from collections import namedtuple from functools import partial from random import random from shutil import copyfile from typing import Tuple, Optional +from urllib.parse import urlencode, urlunparse import lxml.objectify import requests @@ -32,17 +34,25 @@ from cmapi_server.managers.process import MCSProcessManager from mcs_node_control.models.node_config import NodeConfig -def get_id(): +def get_id() -> int: + """Generate pseudo random id for transaction. + + :return: id for internal transaction + :rtype: int + + ..TODO: need to change transaction id format and generation method? + """ return int(random() * 1000000) def start_transaction( - config_filename=CMAPI_CONF_PATH, - cs_config_filename=DEFAULT_MCS_CONF_PATH, - extra_nodes=None, - remove_nodes=None, - optional_nodes=None, - id=get_id() + config_filename: str = CMAPI_CONF_PATH, + cs_config_filename: str = DEFAULT_MCS_CONF_PATH, + extra_nodes: Optional[list] = None, + remove_nodes: Optional[list] = None, + optional_nodes: Optional[list] = None, + txn_id: Optional[int] = None, + timeout: float = 300.0 ): """Start internal CMAPI transaction. @@ -53,19 +63,26 @@ def start_transaction( :param config_filename: cmapi config filepath, defaults to CMAPI_CONF_PATH - :type config_filename: str + :type config_filename: str, optional :param cs_config_filename: columnstore xml config filepath, defaults to DEFAULT_MCS_CONF_PATH :type cs_config_filename: str, optional :param extra_nodes: extra nodes, defaults to None - :type extra_nodes: list, optional + :type extra_nodes: Optional[list], optional :param remove_nodes: remove nodes, defaults to None - :type remove_nodes: list, optional + :type remove_nodes: Optional[list], optional :param optional_nodes: optional nodes, defaults to None - :type optional_nodes: list, optional - :return: (success, txnid, nodes) - :rtype: tuple + :type optional_nodes: Optional[list], optional + :param txn_id: id for transaction to start, defaults to None + :type txn_id: Optional[int], optional + :param timeout: time in seconds for cmapi transaction lock before it ends + automatically, defaults to 300 + :type timeout: float, optional + :return: (success, txn_id, nodes) + :rtype: tuple[bool, int, list[str]] """ + if txn_id is None: + txn_id = get_id() # TODO: Somehow change that logic for eg using several input types # (str\list\set) and detect which one we got. extra_nodes = extra_nodes or [] @@ -78,8 +95,8 @@ def start_transaction( version = get_version() headers = {'x-api-key': api_key} - body = {'id' : id} - final_time = datetime.datetime.now() + datetime.timedelta(seconds=300) + body = {'id' : txn_id} + final_time = datetime.datetime.now() + datetime.timedelta(seconds=timeout) success = False while datetime.datetime.now() < final_time and not success: @@ -180,7 +197,7 @@ def start_transaction( time.sleep(1) if not node_success and node not in optional_nodes: - rollback_txn_attempt(api_key, version, id, successes) + rollback_txn_attempt(api_key, version, txn_id, successes) # wait up to 5 secs and try the whole thing again time.sleep(random() * 5) break @@ -192,7 +209,7 @@ def start_transaction( # are up (> 50%). success = (len(successes) == len(real_active_nodes)) - return (success, id, successes) + return (success, txn_id, successes) def rollback_txn_attempt(key, version, txnid, nodes): headers = {'x-api-key': key} @@ -273,6 +290,7 @@ def broadcast_new_config( sm_config_filename: str = DEFAULT_SM_CONF_PATH, test_mode: bool = False, nodes: Optional[list] = None, + timeout: int = 10 ) -> bool: """Send new config to nodes. Now in async way. @@ -289,8 +307,11 @@ def broadcast_new_config( :type test_mode: bool, optional :param nodes: nodes list for config put, defaults to None :type nodes: Optional[list], optional + :param timeout: timeout passing to gracefully stop DMLProc TODO: for next + releases. Could affect all logic of broadcacting new config + :type timeout: int :return: success state - :rtype: _type_ + :rtype: bool """ cfg_parser = get_config_parser(cmapi_config_filename) @@ -326,6 +347,11 @@ def broadcast_new_config( async def update_config(node, success_nodes, failed_nodes, headers, body): url = f'https://{node}:8640/cmapi/{version}/node/config' + # TODO: investigate about hardcoded 120 seconds timeout + # Check e1242eed47b61276ebc86136f124f6d974655515 in cmapi old + # repo to get more info. Patric made it because: + # "Made the timeout for a CS process restart 120s, since + # the container dispatcher waits up to 60s for SM to stop" request_put = partial( requests.put, url, verify=False, headers=headers, json=body, timeout=120 @@ -845,3 +871,44 @@ def get_dispatcher_name_and_path( config_parser.get('Dispatcher', 'path', fallback='') ) return dispatcher_name, dispatcher_path + + +def build_url( + base_url: str, query_params: dict, scheme: str = 'https', + path: str = '', params: str = '', fragment: str = '', + port: Optional[int] = None +) -> str: + """Build url with query params. + + :param base_url: base url address + :type base_url: str + :param query_params: query params + :type query_params: dict + :param scheme: url scheme, defaults to 'https' + :type scheme: str, optional + :param path: url path, defaults to '' + :type path: str, optional + :param params: params, defaults to '' + :type params: str, optional + :param fragment: fragment, defaults to '' + :type fragment: str, optional + :param port: port for base url, defaults to None + :type port: Optional[int], optional + :return: url with query params + :rtype: str + """ + # namedtuple to match the internal signature of urlunparse + Components = namedtuple( + typename='Components', + field_names=['scheme', 'netloc', 'path', 'params', 'query', 'fragment'] + ) + return urlunparse( + Components( + scheme=scheme, + netloc=f'{base_url}:{port}' if port else base_url, + path=path, + params=params, + query=urlencode(query_params), + fragment=fragment + ) + ) diff --git a/cmapi/cmapi_server/managers/process.py b/cmapi/cmapi_server/managers/process.py index ef01f57aa..1adb62e8d 100644 --- a/cmapi/cmapi_server/managers/process.py +++ b/cmapi/cmapi_server/managers/process.py @@ -170,7 +170,7 @@ class MCSProcessManager: workernodes[name]['Port'] ) ) - except socket.timeout: + except (ConnectionRefusedError, socket.timeout): logging.debug( f'"{name}" {workernodes[name]["IPAddr"]}:' f'{workernodes[name]["Port"]} not started yet.' @@ -262,6 +262,41 @@ class MCSProcessManager: """No operation. TODO: looks like useless.""" cls.process_dispatcher.noop() + @classmethod + def gracefully_stop_dmlproc(cls) -> None: + """Gracefully stop DMLProc using DBRM commands.""" + logging.info( + 'Trying to gracefully stop DMLProc using DBRM commands.' + ) + try: + with DBRM() as dbrm: + dbrm.set_system_state( + ['SS_ROLLBACK', 'SS_SHUTDOWN_PENDING'] + ) + except (ConnectionRefusedError, RuntimeError): + logging.error( + 'Cannot set SS_ROLLBACK and SS_SHUTDOWN_PENDING via DBRM, ' + 'graceful auto stop of DMLProc failed. ' + 'Try a regular stop method.' + ) + raise + + @classmethod + def is_service_running(cls, name: str, use_sudo: bool = True) -> bool: + """Check if MCS process is running. + + :param name: mcs process name + :type name: str + :param use_sudo: use sudo or not, defaults to True + :type use_sudo: bool, optional + :return: True if mcs process is running, otherwise False + :rtype: bool + """ + return cls.process_dispatcher.is_service_running( + cls._get_prog_name(name), use_sudo + ) + + @classmethod def start(cls, name: str, is_primary: bool, use_sudo: bool) -> bool: """Start mcs process. @@ -299,20 +334,9 @@ class MCSProcessManager: # TODO: do we need here force stop DMLProc as a method argument? if is_primary and name == 'DMLProc': - logging.info( - 'Trying to gracefully stop DMLProc using DBRM commands.' - ) try: - with DBRM() as dbrm: - dbrm.set_system_state( - ['SS_ROLLBACK', 'SS_SHUTDOWN_PENDING'] - ) + cls.gracefully_stop_dmlproc() except (ConnectionRefusedError, RuntimeError): - logging.error( - 'Cannot set SS_ROLLBACK and SS_SHUTDOWN_PENDING ' - 'using DBRM while trying to gracefully auto stop DMLProc.' - 'Continue with a regular stop method.' - ) # stop DMLProc using regular signals or systemd return cls.process_dispatcher.stop( cls._get_prog_name(name), is_primary, use_sudo diff --git a/cmapi/cmapi_server/managers/transaction.py b/cmapi/cmapi_server/managers/transaction.py new file mode 100644 index 000000000..10db998df --- /dev/null +++ b/cmapi/cmapi_server/managers/transaction.py @@ -0,0 +1,136 @@ +"""Module related to CMAPI transaction management logic.""" +import logging +from contextlib import ContextDecorator +from signal import ( + SIGINT, SIGTERM, SIGHUP, SIG_DFL, signal, default_int_handler +) +from typing import Optional, Type + +from cmapi_server.constants import DEFAULT_MCS_CONF_PATH +from cmapi_server.exceptions import CMAPIBasicError +from cmapi_server.helpers import ( + get_id, commit_transaction, rollback_transaction, start_transaction +) + + +class TransactionManager(ContextDecorator): + """Context manager and decorator to put any code inside CMAPI transaction. + + :param timeout: time in sec after transaction will be autocommitted, + defaults to 300.0 + + :param timeout: _description_, defaults to 300 + :type timeout: float, optional + :param txn_id: custom transaction id, defaults to None + :type txn_id: Optional[int], optional + :param handle_signals: handle specific signals or not, defaults to False + :type handle_signals: bool, optional + """ + + def __init__( + self, timeout: float = 300, txn_id: Optional[int] = None, + handle_signals: bool = False + ): + self.timeout = timeout + self.txn_id = txn_id or get_id() + self.handle_signals = handle_signals + self.active_transaction = False + + def _handle_exception( + self, exc: Optional[Type[Exception]] = None, + signum: Optional[int] = None + ) -> None: + """Handle raised exceptions. + + We need to rollback transaction in some cases and return back default + signal handlers. + + :param exc: exception passed, defaults to None + :type exc: Optional[Type[Exception]], optional + :param signum: signal if it cause exception, defaults to None + :type signum: Optional[int], optional + :raises exc: raises passed exception + """ + # message = 'Got exception in transaction manager' + if (exc or signum) and self.active_transaction: + self.rollback_transaction() + self.set_default_signals() + raise exc + + def _handle_signal(self, signum, frame) -> None: + """Handler for signals. + + :param signum: signal number + :type signum: int + """ + logging.error(f'Caught signal "{signum}" in transaction manager.') + self._handle_exception(signum=signum) + + def set_custom_signals(self) -> None: + """Set handlers for several signals.""" + # register handler for signals for proper handling them + for sig in SIGINT, SIGTERM, SIGHUP: + signal(sig, self._handle_signal) + + def set_default_signals(self) -> None: + """Return defalt handlers for specific signals.""" + if self.handle_signals: + signal(SIGINT, default_int_handler) + signal(SIGTERM, SIG_DFL) + signal(SIGHUP, SIG_DFL) + + def rollback_transaction(self) -> None: + """Rollback transaction.""" + try: + rollback_transaction(self.txn_id) + self.active_transaction = False + logging.debug(f'Success rollback of transaction "{self.txn_id}".') + except Exception: + logging.error( + f'Error while rollback transaction "{self.txn_id}"', + exc_info=True + ) + + def commit_transaction(self): + """Commit transaction.""" + try: + commit_transaction( + self.txn_id, cs_config_filename=DEFAULT_MCS_CONF_PATH + ) + except Exception: + logging.error(f'Error while committing transaction {self.txn_id}') + self.rollback_transaction() + self.set_default_signals() + raise + + def __enter__(self): + if self.handle_signals: + self.set_custom_signals() + try: + suceeded, _transaction_id, successes = start_transaction( + cs_config_filename=DEFAULT_MCS_CONF_PATH, + txn_id=self.txn_id, timeout=self.timeout + ) + except Exception as exc: + logging.error('Error while starting the transaction.') + self._handle_exception(exc=exc) + if not suceeded: + self._handle_exception( + exc=CMAPIBasicError('Starting transaction isn\'t succesful.') + ) + if suceeded and len(successes) == 0: + self._handle_exception( + exc=CMAPIBasicError('There are no nodes in the cluster.') + ) + self.active_transaction = True + return self + + def __exit__(self, *exc): + if exc[0] and self.active_transaction: + self.rollback_transaction() + self.set_default_signals() + return False + if self.active_transaction: + self.commit_transaction() + self.set_default_signals() + return True diff --git a/cmapi/cmapi_server/process_dispatchers/container.py b/cmapi/cmapi_server/process_dispatchers/container.py index 0aeccae7a..7db927b32 100644 --- a/cmapi/cmapi_server/process_dispatchers/container.py +++ b/cmapi/cmapi_server/process_dispatchers/container.py @@ -107,6 +107,9 @@ class ContainerDispatcher(BaseDispatcher): :type use_sudo: bool, optional :return: True if service is running, otherwise False :rtype: bool + + ..Note: + Not working with multiple services at a time. """ try: cls._get_proc_object(service) diff --git a/cmapi/cmapi_server/process_dispatchers/systemd.py b/cmapi/cmapi_server/process_dispatchers/systemd.py index 7d3e7e305..8b7b2714d 100644 --- a/cmapi/cmapi_server/process_dispatchers/systemd.py +++ b/cmapi/cmapi_server/process_dispatchers/systemd.py @@ -55,7 +55,7 @@ class SystemdDispatcher(BaseDispatcher): """Check if systemd service is running. :param service: service name - :type service: str, optional + :type service: str :param use_sudo: use sudo or not, defaults to True :type use_sudo: bool, optional :return: True if service is running, otherwise False diff --git a/cmapi/mcs_cluster_tool/__main__.py b/cmapi/mcs_cluster_tool/__main__.py index 3b433ecfc..a246bc304 100644 --- a/cmapi/mcs_cluster_tool/__main__.py +++ b/cmapi/mcs_cluster_tool/__main__.py @@ -4,7 +4,9 @@ import sys import typer from cmapi_server.logging_management import dict_config, add_logging_level -from mcs_cluster_tool import cluster_app, cmapi_app +from mcs_cluster_tool import ( + cluster_app, cmapi_app, backup_commands, restore_commands +) from mcs_cluster_tool.constants import MCS_CLI_LOG_CONF_PATH @@ -16,11 +18,15 @@ app = typer.Typer( 'MCS services' ), ) -app.add_typer(cluster_app.app, name="cluster") -app.add_typer(cmapi_app.app, name="cmapi") +app.add_typer(cluster_app.app, name='cluster') +app.add_typer(cmapi_app.app, name='cmapi') +app.command()(backup_commands.backup) +app.command('backup-dbrm')(backup_commands.dbrm_backup) +app.command()(restore_commands.restore) +app.command('restore-dbrm')(restore_commands.dbrm_restore) -if __name__ == "__main__": +if __name__ == '__main__': add_logging_level('TRACE', 5) #TODO: remove when stadalone mode added. dict_config(MCS_CLI_LOG_CONF_PATH) logger = logging.getLogger('mcs_cli') diff --git a/cmapi/mcs_cluster_tool/backup_commands.py b/cmapi/mcs_cluster_tool/backup_commands.py new file mode 100644 index 000000000..c0679ded5 --- /dev/null +++ b/cmapi/mcs_cluster_tool/backup_commands.py @@ -0,0 +1,353 @@ +"""Typer application for backup Columnstore data.""" +import logging +import sys +from datetime import datetime +from typing_extensions import Annotated + +import typer + +from cmapi_server.process_dispatchers.base import BaseDispatcher +from mcs_cluster_tool.constants import MCS_BACKUP_MANAGER_SH +from mcs_cluster_tool.decorators import handle_output +from mcs_cluster_tool.helpers import cook_sh_arg + + +logger = logging.getLogger('mcs_cli') +# pylint: disable=unused-argument, too-many-arguments, too-many-locals +# pylint: disable=invalid-name, line-too-long + + +@handle_output +def backup( + bl: Annotated[ + str, + typer.Option( + '-bl', '--backup-location', + help=( + 'What directory to store the backups on this machine or the target machine.\n' + 'Consider write permissions of the scp user and the user running this script.\n' + 'Mariadb-backup will use this location as a tmp dir for S3 and remote backups temporarily.\n' + 'Example: /mnt/backups/' + ) + ) + ] = '/tmp/backups/', + bd: Annotated[ + str, + typer.Option( + '-bd', '--backup-destination', + help=( + 'Are the backups going to be stored on the same machine this ' + 'script is running on or another server - if Remote you need ' + 'to setup scp=' + 'Options: "Local" or "Remote"' + ) + ) + ] = 'Local', + scp: Annotated[ + str, + typer.Option( + '-scp', + help=( + 'Used only if --backup-destination="Remote".\n' + 'The user/credentials that will be used to scp the backup ' + 'files\n' + 'Example: "centos@10.14.51.62"' + ) + ) + ] = '', + bb: Annotated[ + str, + typer.Option( + '-bb', '--backup-bucket', + help=( + 'Only used if --storage=S3\n' + 'Name of the bucket to store the columnstore backups.\n' + 'Example: "s3://my-cs-backups"' + ) + ) + ] = '', + url: Annotated[ + str, + typer.Option( + '-url', '--endpoint-url', + help=( + 'Used by on premise S3 vendors.\n' + 'Example: "http://127.0.0.1:8000"' + ) + ) + ] = '', + nv_ssl: Annotated[ + bool, + typer.Option( + '-nv-ssl/-v-ssl','--no-verify-ssl/--verify-ssl', + help='Skips verifying ssl certs, useful for onpremise s3 storage.' + ) + ] = False, + s: Annotated[ + str, + typer.Option( + '-s', '--storage', + help=( + 'What storage topogoly is being used by Columnstore - found ' + 'in /etc/columnstore/storagemanager.cnf.\n' + 'Options: "LocalStorage" or "S3"' + ) + ) + ] = 'LocalStorage', + i: Annotated[ + str, + typer.Option( + '-i', '--incremental', + help=( + 'Adds columnstore deltas to an existing full backup. ' + 'Backup folder to apply increment could be a value or ' + '"auto_most_recent" - the incremental backup applies to ' + 'last full backup.' + ), + show_default=False + ) + ] = '', + ha: Annotated[ + bool, + typer.Option( + '-ha/-no-ha', '--highavilability/--no-highavilability', + help=( + 'Hint wether shared storage is attached @ below on all nodes ' + 'to see all data\n' + ' HA LocalStorage ( /var/lib/columnstore/dataX/ )\n' + ' HA S3 ( /var/lib/columnstore/storagemanager/ )' + ) + ) + ] = False, + f: Annotated[ + str, + typer.Option( + '-f', '--config-file', + help='Path to backup configuration file to load variables from.', + show_default=False + ) + ] = '', + sbrm: Annotated[ + bool, + typer.Option( + '-sbrm/-no-sbrm', '--skip-save-brm/--no-skip-save-brm', + help=( + 'Skip saving brm prior to running a backup - ' + 'ideal for dirty backups.' + ) + ) + ] = False, + spoll: Annotated[ + bool, + typer.Option( + '-spoll/-no-spoll', '--skip-polls/--no-skip-polls', + help='Skip sql checks confirming no write/cpimports running.' + ) + ] = False, + slock: Annotated[ + bool, + typer.Option( + '-slock/-no-slock', '--skip-locks/--no-skip-locks', + help='Skip issuing write locks - ideal for dirty backups.' + ) + ] = False, + smdb: Annotated[ + bool, + typer.Option( + '-smdb/-no-smdb', '--skip-mariadb-backup/--no-skip-mariadb-backup', + help=( + 'Skip running a mariadb-backup for innodb data - ideal for ' + 'incremental dirty backups.' + ) + ) + ] = False, + sb: Annotated[ + bool, + typer.Option( + '-sb/-no-sb', '--skip-bucket-data/--no-skip-bucket-data', + help='Skip taking a copy of the columnstore data in the bucket.' + ) + ] = False, + pi: Annotated[ + int, + typer.Option( + '-pi', '--poll-interval', + help=( + 'Number of seconds between poll checks for active writes & ' + 'cpimports.' + ) + ) + ] = 5, + pmw: Annotated[ + int, + typer.Option( + '-pmw', '--poll-max-wait', + help=( + 'Max number of minutes for polling checks for writes to wait ' + 'before exiting as a failed backup attempt.' + ) + ) + ] = 60, + q: Annotated[ + bool, + typer.Option( + '-q/-no-q', '--quiet/--no-quiet', + help='Silence verbose copy command outputs.' + ) + ] = False, + c: Annotated[ + str, + typer.Option( + '-c', '--compress', + help='Compress backup in X format - Options: [ pigz ].', + show_default=False + ) + ] = '', + P: Annotated[ + int, + typer.Option( + '-P', '--parallel', + help=( + 'Determines if columnstore data directories will have ' + 'multiple rsync running at the same time for different ' + 'subfolders to parallelize writes. ' + 'Ignored if "-c/--compress" argument not set.' + ) + ) + ] = 4, + nb: Annotated[ + str, + typer.Option( + '-nb', '--name-backup', + help='Define the name of the backup - default: $(date +%m-%d-%Y)' + ) + ] = datetime.now().strftime('%m-%d-%Y'), + m: Annotated[ + str, + typer.Option( + '-m', '--mode', + help=( + 'Modes ["direct","indirect"] - direct backups run on the ' + 'columnstore nodes themselves. indirect run on another ' + 'machine that has read-only mounts associated with ' + 'columnstore/mariadb\n' + ), + hidden=True + ) + ] = 'direct', + r: Annotated[ + int, + typer.Option( + '-r', '--retention-days', + help=( + 'Retain backups created within the last X days, ' + 'default 0 == keep all backups.' + ) + ) + ] = 0, +): + """Backup Columnstore and/or MariDB data.""" + + # Local Storage Examples: + # ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage + # ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage -P 8 + # ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage --incremental 02-18-2022 + # ./$0 backup -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -s LocalStorage + + # S3 Examples: + # ./$0 backup -bb s3://my-cs-backups -s S3 + # ./$0 backup -bb s3://my-cs-backups -c pigz --quiet -sb + # ./$0 backup -bb gs://my-cs-backups -s S3 --incremental 02-18-2022 + # ./$0 backup -bb s3://my-onpremise-bucket -s S3 -url http://127.0.0.1:8000 + + # Cron Example: + # */60 */24 * * * root bash /root/$0 -bb s3://my-cs-backups -s S3 >> /root/csBackup.log 2>&1 + + arguments = [] + for arg_name, value in locals().items(): + sh_arg = cook_sh_arg(arg_name, value) + if sh_arg is None: + continue + arguments.append(sh_arg) + cmd = f'{MCS_BACKUP_MANAGER_SH} backup {" ".join(arguments)}' + success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout) + return {'success': success} + + +@handle_output +def dbrm_backup( + m: Annotated[ + str, + typer.Option( + '-m', '--mode', + help=( + '"loop" or "once" ; Determines if this script runs in a ' + 'forever loop sleeping -i minutes or just once.' + ), + ) + ] = 'once', + i: Annotated[ + int, + typer.Option( + '-i', '--interval', + help='Number of minutes to sleep when --mode=loop.' + ) + ] = 90, + r: Annotated[ + int, + typer.Option( + '-r', '--retention-days', + help=( + 'Retain dbrm backups created within the last X days, ' + 'the rest are deleted' + ) + ) + ] = 7, + p: Annotated[ + str, + typer.Option( + '-p', '--path', + help='Path of where to save the dbrm backups on disk.' + ) + ] = '/tmp/dbrm_backups', + nb: Annotated[ + str, + typer.Option( + '-nb', '--name-backup', + help='Custom name to prefex dbrm backups with.' + ) + ] = 'dbrm_backup', + q: Annotated[ + bool, + typer.Option( + '-q/-no-q', '--quiet/--no-quiet', + help='Silence verbose copy command outputs.' + ) + ] = False, + ssm: Annotated[ + bool, + typer.Option( + '-ssm/-no-ssm', '--skip-storage-manager/--no-skip-storage-manager', + help='Skip backing up storagemanager directory.' + ) + ] = False, +): + """Columnstore DBRM Backup.""" + + # Default: ./$0 dbrm_backup -m once --retention-days 7 --path /tmp/dbrm_backups + + # Examples: + # ./$0 dbrm_backup --mode loop --interval 90 --retention-days 7 --path /mnt/dbrm_backups + # ./$0 dbrm_backup --mode once --retention-days 7 --path /mnt/dbrm_backups -nb my-one-off-backup + + # Cron Example: + # */60 */3 * * * root bash /root/$0 dbrm_backup -m once --retention-days 7 --path /tmp/dbrm_backups >> /tmp/dbrm_backups/cs_backup.log 2>&1 + arguments = [] + for arg_name, value in locals().items(): + sh_arg = cook_sh_arg(arg_name, value) + if sh_arg is None: + continue + arguments.append(sh_arg) + cmd = f'{MCS_BACKUP_MANAGER_SH} dbrm_backup {" ".join(arguments)}' + success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout) + return {'success': success} diff --git a/cmapi/mcs_cluster_tool/cluster_app.py b/cmapi/mcs_cluster_tool/cluster_app.py index 27963adf8..07934f31e 100644 --- a/cmapi/mcs_cluster_tool/cluster_app.py +++ b/cmapi/mcs_cluster_tool/cluster_app.py @@ -3,14 +3,26 @@ Formally this module contains all subcommands for "mcs cluster" cli command. """ import logging +import time +from datetime import datetime, timedelta from typing import List, Optional import pyotp +import requests import typer +from typing_extensions import Annotated -from cmapi_server.constants import SECRET_KEY +from cmapi_server.constants import ( + CMAPI_CONF_PATH, DEFAULT_MCS_CONF_PATH, SECRET_KEY +) +from cmapi_server.exceptions import CMAPIBasicError from cmapi_server.handlers.cluster import ClusterHandler +from cmapi_server.helpers import ( + get_config_parser, get_current_key, get_version, build_url +) +from cmapi_server.managers.transaction import TransactionManager from mcs_cluster_tool.decorators import handle_output +from mcs_node_control.models.node_config import NodeConfig logger = logging.getLogger('mcs_cli') @@ -32,9 +44,121 @@ def status(): @app.command() @handle_output -def stop(): +@TransactionManager( + timeout=timedelta(days=1).total_seconds(), handle_signals=True +) +def stop( + interactive: Annotated[ + bool, + typer.Option( + '--interactive/--no-interactive', '-i/-no-i', + help=( + 'Use this option on active cluster as interactive stop ' + 'waits for current writes to complete in DMLProc before ' + 'shutting down. Ensuring consistency, preventing data loss ' + 'of active writes.' + ), + ) + ] = False, + timeout: Annotated[ + int, + typer.Option( + '-t', '--timeout', + help=( + 'Time in seconds to wait for DMLproc to gracefully stop.' + 'Warning: Low wait timeout values could result in data loss ' + 'if the cluster is very active.' + 'In interactive mode means delay time between promts.' + ) + ) + ] = 15, + force: Annotated[ + bool, + typer.Option( + '--force/--no-force', '-f/-no-f', + help=( + 'Force stops Columnstore.' + 'Warning: This could cause data corruption and/or data loss.' + ), + #TODO: hide from help till not investigated in decreased timeout + # affect + hidden=True + ) + ] = False +): """Stop the Columnstore cluster.""" - return ClusterHandler.shutdown(logger=logger) + + start_time = str(datetime.now()) + if interactive: + # TODO: for standalone cli tool need to change primary detection + # method. Partially move logic below to ClusterController + nc = NodeConfig() + root = nc.get_current_config_root( + config_filename=DEFAULT_MCS_CONF_PATH + ) + primary_node = root.find("./PrimaryNode").text + cfg_parser = get_config_parser(CMAPI_CONF_PATH) + api_key = get_current_key(cfg_parser) + version = get_version() + + headers = {'x-api-key': api_key} + body = {'force': False, 'timeout': timeout} + url = f'https://{primary_node}:8640/cmapi/{version}/node/stop_dmlproc' + try: + resp = requests.put( + url, verify=False, headers=headers, json=body, + timeout=timeout+1 + ) + resp.raise_for_status() + except Exception as err: + raise CMAPIBasicError( + f'Error while stopping DMLProc on primary node.' + ) from err + + force = True + while True: + time.sleep(timeout) + url = build_url( + base_url=primary_node, port=8640, + query_params={'process_name': 'DMLProc'}, + path=f'cmapi/{version}/node/is_process_running', + ) + try: + resp = requests.get( + url, verify=False, headers=headers, timeout=timeout + ) + resp.raise_for_status() + except Exception as err: + raise CMAPIBasicError( + f'Error while getting mcs DMLProc status.' + ) from err + + # check DMLPRoc state + # if ended, show message and break + dmlproc_running = resp.json()['running'] + if not dmlproc_running: + logging.info( + 'DMLProc stopped gracefully. ' + 'Continue stopping other processes.' + ) + break + else: + force = typer.confirm( + 'DMLProc is still running. ' + 'Do you want to force stop? ' + 'WARNING: Could cause data loss and/or broken cluster.', + prompt_suffix=' ' + ) + if force: + break + else: + continue + if force: + # TODO: investigate more on how changing the hardcoded timeout + # could affect put_config (helpers.py broadcast_config) operation + timeout = 0 + _ = ClusterHandler.shutdown(logger=logger, in_transaction=True) + return {'timestamp': start_time} @app.command() diff --git a/cmapi/mcs_cluster_tool/constants.py b/cmapi/mcs_cluster_tool/constants.py index 796259ff5..ec988175c 100644 --- a/cmapi/mcs_cluster_tool/constants.py +++ b/cmapi/mcs_cluster_tool/constants.py @@ -1,4 +1,9 @@ import os +from cmapi_server.constants import MCS_INSTALL_BIN + + MCS_CLI_ROOT_PATH = os.path.dirname(__file__) MCS_CLI_LOG_CONF_PATH = os.path.join(MCS_CLI_ROOT_PATH, 'mcs_cli_log.conf') + +MCS_BACKUP_MANAGER_SH = os.path.join(MCS_INSTALL_BIN, 'mcs_backup_manager.sh') diff --git a/cmapi/mcs_cluster_tool/helpers.py b/cmapi/mcs_cluster_tool/helpers.py new file mode 100644 index 000000000..5aceba756 --- /dev/null +++ b/cmapi/mcs_cluster_tool/helpers.py @@ -0,0 +1,29 @@ +"""Module with helper functions for mcs cli tool.""" +from typing import Union + + +def cook_sh_arg(arg_name: str, value:Union[str, int, bool]) -> str: + """Convert argument and and value from function locals to bash argument. + + :param arg_name: function argument name + :type arg_name: str + :param value: function argument value + :type value: Union[str, int, bool] + :return: bash argument string + :rtype: str + """ + # skip "arguments" list and Typer ctx variables from local scope + if arg_name in ('arguments', 'ctx'): + return None + # skip args that have empty string as value + if value == '': + return None + if '_' in arg_name: + arg_name = arg_name.replace('_', '-') + # skip boolean args that have False value + if isinstance(value, bool): + if not value: + return None + # if True value presented just pass only arg name without value + value = '' + return f'-{arg_name} {value}' if value else f'-{arg_name}' diff --git a/cmapi/mcs_cluster_tool/restore_commands.py b/cmapi/mcs_cluster_tool/restore_commands.py new file mode 100644 index 000000000..e4881f917 --- /dev/null +++ b/cmapi/mcs_cluster_tool/restore_commands.py @@ -0,0 +1,324 @@ +"""Typer application for restore Columnstore data.""" +import logging +import sys +from typing_extensions import Annotated + +import typer + +from cmapi_server.process_dispatchers.base import BaseDispatcher +from mcs_cluster_tool.constants import MCS_BACKUP_MANAGER_SH +from mcs_cluster_tool.decorators import handle_output +from mcs_cluster_tool.helpers import cook_sh_arg + + +logger = logging.getLogger('mcs_cli') +# pylint: disable=unused-argument, too-many-arguments, too-many-locals +# pylint: disable=invalid-name, line-too-long + + +@handle_output +def restore( + l: Annotated[ + str, + typer.Option( + '-l', '--load', + help='What date folder to load from the backup_location.' + ) + ] = '', + bl: Annotated[ + str, + typer.Option( + '-bl', '--backup-location', + help=( + 'Where the backup to load is found.\n' + 'Example: /mnt/backups/' + ) + ) + ] = '/tmp/backups/', + bd: Annotated[ + str, + typer.Option( + '-bd', '--backup_destination', + help=( + 'Is this backup on the same or remote server compared to ' + 'where this script is running.\n' + 'Options: "Local" or "Remote"' + ) + ) + ] = 'Local', + scp: Annotated[ + str, + typer.Option( + '-scp', '--secure-copy-protocol', + help=( + 'Used only if --backup-destination=Remote' + 'The user/credentials that will be used to scp the backup files.' + 'Example: "centos@10.14.51.62"' + ) + ) + ] = '', + bb: Annotated[ + str, + typer.Option( + '-bb', '--backup-bucket', + help=( + 'Only used if --storage=S3\n' + 'Name of the bucket to store the columnstore backups.\n' + 'Example: "s3://my-cs-backups"' + ) + ) + ] = '', + url: Annotated[ + str, + typer.Option( + '-url', '--endpoint-url', + help=( + 'Used by on premise S3 vendors.\n' + 'Example: "http://127.0.0.1:8000"' + ) + ) + ] = '', + s: Annotated[ + str, + typer.Option( + '-s', '--storage', + help=( + 'What storage topogoly is being used by Columnstore - found ' + 'in /etc/columnstore/storagemanager.cnf.\n' + 'Options: "LocalStorage" or "S3"' + ) + ) + ] = 'LocalStorage', + dbs: Annotated[ + int, + typer.Option( + '-dbs', '--dbroots', + help='Number of database roots in the backup.' + ) + ] = 1, + pm: Annotated[ + str, + typer.Option( + '-pm', '--nodeid', + help=( + 'Forces the handling of the restore as this node as opposed ' + 'to whats detected on disk.' + ) + ) + ] = '', + nb: Annotated[ + str, + typer.Option( + '-nb', '--new-bucket', + help=( + 'Defines the new bucket to copy the s3 data to from the ' + 'backup bucket. Use -nb if the new restored cluster should ' + 'use a different bucket than the backup bucket itself.' + ) + ) + ] = '', + nr: Annotated[ + str, + typer.Option( + '-nr', '--new-region', + help=( + 'Defines the region of the new bucket to copy the s3 data to ' + 'from the backup bucket.' + ) + ) + ] = '', + nk: Annotated[ + str, + typer.Option( + '-nk', '--new-key', + help='Defines the aws key to connect to the new_bucket.' + ) + ] = '', + ns: Annotated[ + str, + typer.Option( + '-ns', '--new-secret', + help=( + 'Defines the aws secret of the aws key to connect to the ' + 'new_bucket.' + ) + ) + ] = '', + ha: Annotated[ + bool, + typer.Option( + '-ha/-no-ha', '--highavilability/--no-highavilability', + help=( + 'Flag for high available systems (meaning shared storage ' + 'exists supporting the topology so that each node sees ' + 'all data)' + ) + ) + ] = False, + cont: Annotated[ + bool, + typer.Option( + '-cont/-no-cont', '--continue/--no-continue', + help=( + 'This acknowledges data in your --new_bucket is ok to delete ' + 'when restoring S3. When set to true skips the enforcement ' + 'that new_bucket should be empty prior to starting a restore.' + ) + ) + ] = False, + f: Annotated[ + str, + typer.Option( + '-f', '--config-file', + help='Path to backup configuration file to load variables from.', + show_default=False + ) + ] = '', + smdb: Annotated[ + bool, + typer.Option( + '-smdb/-no-smdb', '--skip-mariadb-backup/--no-skip-mariadb-backup', + help=( + 'Skip restoring mariadb server via mariadb-backup - ideal for ' + 'only restoring columnstore.' + ) + ) + ] = False, + sb: Annotated[ + bool, + typer.Option( + '-sb/-no-sb', '--skip-bucket-data/--no-skip-bucket-data', + help=( + 'Skip restoring columnstore data in the bucket - ideal if ' + 'looking to only restore mariadb server.' + ) + ) + ] = False, + m: Annotated[ + str, + typer.Option( + '-m', '--mode', + help=( + 'Modes ["direct","indirect"] - direct backups run on the ' + 'columnstore nodes themselves. indirect run on another ' + 'machine that has read-only mounts associated with ' + 'columnstore/mariadb\n' + ), + hidden=True + ) + ] = 'direct', + c: Annotated[ + str, + typer.Option( + '-c', '--compress', + help=( + 'Hint that the backup is compressed in X format. ' + 'Options: [ pigz ].' + ), + show_default=False + ) + ] = '', + P: Annotated[ + int, + typer.Option( + '-P', '--parallel', + help=( + 'Determines number of decompression and mdbstream threads. ' + 'Ignored if "-c/--compress" argument not set.' + ) + ) + ] = 4, + q: Annotated[ + bool, + typer.Option( + '-q/-no-q', '--quiet/--no-quiet', + help='Silence verbose copy command outputs.' + ) + ] = False, + nv_ssl: Annotated[ + bool, + typer.Option( + '-nv-ssl/-v-ssl','--no-verify-ssl/--verify-ssl', + help='Skips verifying ssl certs, useful for onpremise s3 storage.' + ) + ] = False, +): + """Restore Columnstore (and/or MariaDB) data.""" + + # Local Storage Examples: + # ./$0 restore -s LocalStorage -bl /tmp/backups/ -bd Local -l 12-29-2021 + # ./$0 restore -s LocalStorage -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -l 12-29-2021 + + # S3 Storage Examples: + # ./$0 restore -s S3 -bb s3://my-cs-backups -l 12-29-2021 + # ./$0 restore -s S3 -bb gs://on-premise-bucket -l 12-29-2021 -url http://127.0.0.1:8000 + # ./$0 restore -s S3 -bb s3://my-cs-backups -l 08-16-2022 -nb s3://new-data-bucket -nr us-east-1 -nk AKIAxxxxxxx3FHCADF -ns GGGuxxxxxxxxxxnqa72csk5 -ha + arguments = [] + for arg_name, value in locals().items(): + sh_arg = cook_sh_arg(arg_name, value) + if sh_arg is None: + continue + arguments.append(sh_arg) + cmd = f'{MCS_BACKUP_MANAGER_SH} restore {" ".join(arguments)}' + success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout) + return {'success': success} + + +@handle_output +def dbrm_restore( + p: Annotated[ + str, + typer.Option( + '-p', '--path', + help='Path of where dbrm backups stored on disk.' + ) + ] = '/tmp/dbrm_backups', + d: Annotated[ + str, + typer.Option( + '-d', '--directory', + help='Date or directory chose to restore from.' + ) + ] = '', + ns: Annotated[ + bool, + typer.Option( + '-ns', '--no-start', + help=( + 'Do not attempt columnstore startup post dbrm_restore.' + ) + ) + ] = False, + sdbk: Annotated[ + bool, + typer.Option( + '-sdbk/-no-sdbk', '--skip-dbrm-backup/--no-skip-dbrm-backup', + help=( + 'Skip backing up dbrms before restoring.' + ) + ) + ] = True, + ssm: Annotated[ + bool, + typer.Option( + '-ssm/-no-ssm', '--skip-storage-manager/--no-skip-storage-manager', + help='Skip backing up storagemanager directory.' + ) + ] = True, +): + """Restore Columnstore DBRM data.""" + + # Default: ./$0 dbrm_restore --path /tmp/dbrm_backups + + # Examples: + # ./$0 dbrm_restore --path /tmp/dbrm_backups --directory dbrm_backup_20240318_172842 + # ./$0 dbrm_restore --path /tmp/dbrm_backups --directory dbrm_backup_20240318_172842 --no-start + arguments = [] + for arg_name, value in locals().items(): + sh_arg = cook_sh_arg(arg_name, value) + if sh_arg is None: + continue + arguments.append(sh_arg) + cmd = f'{MCS_BACKUP_MANAGER_SH} dbrm_restore {" ".join(arguments)}' + success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout) + return {'success': success} diff --git a/cmapi/mcs_node_control/models/node_config.py b/cmapi/mcs_node_control/models/node_config.py index 78a16d78b..91930b0bf 100644 --- a/cmapi/mcs_node_control/models/node_config.py +++ b/cmapi/mcs_node_control/models/node_config.py @@ -115,7 +115,6 @@ class NodeConfig: maintenance = etree.SubElement(root, 'Maintenance') maintenance.text = str(False).lower() - def upgrade_config(self, tree=None, root=None, upgrade=True): """ Add the parts that might be missing after an upgrade from an earlier @@ -290,7 +289,6 @@ class NodeConfig: return pm_num raise Exception("Did not find my IP addresses or names in the SystemModuleConfig section") - def rollback_config(self, config_filename: str = DEFAULT_MCS_CONF_PATH): """Rollback the configuration. @@ -307,7 +305,6 @@ class NodeConfig: if config_file_copy.exists(): replace(backup_path, config_file) # atomic replacement - def get_current_config(self, config_filename: str = DEFAULT_MCS_CONF_PATH): """Retrievs current configuration. @@ -325,7 +322,6 @@ class NodeConfig: tree.getroot(), pretty_print=True, encoding='unicode' ) - def get_current_sm_config( self, config_filename: str = DEFAULT_SM_CONF_PATH ) -> str: @@ -343,7 +339,6 @@ class NodeConfig: module_logger.error(f"{func_name} SM config {config_filename} not found.") return '' - def s3_enabled(self, config_filename: str = DEFAULT_SM_CONF_PATH) -> bool: """Checks if SM is enabled diff --git a/cmapi/scripts/cs_package_manager.sh b/cmapi/scripts/cs_package_manager.sh new file mode 100644 index 000000000..fd164ee7e --- /dev/null +++ b/cmapi/scripts/cs_package_manager.sh @@ -0,0 +1,2095 @@ +#!/bin/bash +# Documentation: bash cs_package_manager.sh help + +# Variables +enterprise_token="" +dev_drone_key="" +cs_pkg_manager_version="3.3" +if [ ! -f /var/lib/columnstore/local/module ]; then pm="pm1"; else pm=$(cat /var/lib/columnstore/local/module); fi; +pm_number=$(echo "$pm" | tr -dc '0-9') +action=$1 + +print_help_text() { + echo "Version $cs_pkg_manager_version + +Example Remove: + bash $0 remove + bash $0 remove all + +Example Install: + bash $0 install [enterprise|community|dev] [version|branch] [build num] --token xxxxxxx + bash $0 install enterprise 10.6.12-8 --token xxxxxx + bash $0 install community 11.1 + bash $0 install dev develop cron/8629 + bash $0 install dev develop-23.02 pull_request/7256 + +Example Check: + bash $0 check community + bash $0 check enterprise +" +} + +wait_cs_down() { + retries=0 + max_number_of_retries=20 + + if ! command -v pgrep &> /dev/null; then + printf "\n[!] pgrep not found. Please install pgrep\n\n" + exit 1; + fi + + # Loop until the maximum number of retries is reached + # printf " - Checking Columnstore Offline ..."; + while [ $retries -lt $max_number_of_retries ]; do + # If columnstore is offline, return + cs_processlist=$(pgrep -f "PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|save_brm|mcs-loadbrm.py") + if [ -z "$cs_processlist" ]; then + # printf " Done \n"; + mcs_offine=true + return 0 + else + printf "\n[!] Columnstore is ONLINE - waiting 5s to retry, attempt: $retries...\n"; + if (( retries % 5 == 0 )); then + echo "PID List: $cs_processlist" + echo "$(ps aux | grep -E "PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|save_brm|mcs-loadbrm.py" | grep -v grep) " + fi + sleep 5 + ((retries++)) + fi + done + + # If maximum retries reached, exit with an error + echo "PID List: $cs_processlist" + printf "\n[!!!] Columnstore is still online after $max_number_of_retries retries ... exiting \n\n" + exit 2 +} + +print_and_copy() { + printf " - %-35s ..." "$1" + cp -p $1 $2 + printf " Done\n" +} + +print_and_delete() { + printf " - %-25s ..." "$1" + rm -rf $1 + printf " Done\n" +} + +init_cs_down() { + mcs_offine=false + if [ "$pm_number" == "1" ]; then + if [ -z $(pidof PrimProc) ]; then + # printf "\n[+] Columnstore offline already"; + mcs_offine=true + else + + if is_cmapi_installed ; then + confirm_cmapi_online_and_configured + + # Stop columnstore + printf "%-35s ... " " - Stopping Columnstore Engine" + if command -v mcs &> /dev/null; then + if ! mcs_output=$(mcs cluster stop); then + echo "[!] Failed stopping via mcs ... trying cmapi curl" + stop_cs_cmapi_via_curl + fi + printf "Done - $(date)\n" + + # Handle Errors with exit 0 code + if [ ! -z "$(echo $mcs_output | grep "Internal Server Error")" ];then + stop_cs_via_systemctl_override + fi + else + stop_cs_cmapi_via_curl + fi + else + stop_cs_via_systemctl + fi + fi + fi +} + +init_cs_up(){ + + if [ "$pm_number" == "1" ]; then + if [ -n "$(pidof PrimProc)" ]; then + + num_cs_processlist=$(pgrep -f "PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|save_brm|mcs-loadbrm.py" | wc -l) + if [ $num_cs_processlist -gt "0" ]; then + printf "%-35s ... $num_cs_processlist processes \n" " - Columnstore Engine Online" + fi + + else + + # Check cmapi installed + if is_cmapi_installed ; then + confirm_cmapi_online_and_configured + + # Start columnstore + printf "%-35s ..." " - Starting Columnstore Engine" + if command -v mcs &> /dev/null; then + if ! mcs_output=$(mcs cluster start); then + echo "[!] Failed starting via mcs ... trying cmapi curl" + start_cs_cmapi_via_curl + fi + printf " Done - $(date)\n" + + else + start_cs_cmapi_via_curl + fi + else + start_cs_via_systemctl + fi + fi + fi +} + +stop_columnstore() { + init_cs_down + wait_cs_down +} + +is_cmapi_installed() { + + if [ -z "$package_manager" ]; then + echo "package_manager: $package_manager" + echo "package_manager is not defined" + exit 1; + fi + + cmapi_installed_command="" + case $package_manager in + yum ) + cmapi_installed_command="yum list installed MariaDB-columnstore-cmapi &> /dev/null;"; + ;; + apt ) + cmapi_installed_command="dpkg-query -s mariadb-columnstore-cmapi &> /dev/null;"; + ;; + *) # unknown option + echo "\npackage manager not implemented: $package_manager\n" + exit 2; + esac + + if eval $cmapi_installed_command ; then + return 0 + else + return 1 + fi +} + +start_cmapi() { + + if ! command -v systemctl &> /dev/null ; then + printf "[!!] shutdown_mariadb_and_cmapi: Cant access systemctl\n\n" + exit 1; + fi + + if is_cmapi_installed ; then + printf "%-35s ..." " - Starting CMAPI" + if systemctl start mariadb-columnstore-cmapi ; then + printf " Done\n" + else + echo "[!!] Failed to start CMAPI" + exit 1; + fi; + fi +} + +stop_cmapi() { + + if ! command -v systemctl &> /dev/null ; then + printf "[!!] shutdown_mariadb_and_cmapi: Cant access systemctl\n\n" + exit 1; + fi + + if is_cmapi_installed ; then + printf "%-35s ..." " - Stopping CMAPI" + if systemctl stop mariadb-columnstore-cmapi ; then + printf " Done\n" + else + echo "[!!] Failed to stop CMAPI" + exit 1; + fi; + fi +} + +# Inputs: +# $1 = version already installed +# $2 = version desired to install +# Returns 0 if $2 is greater, else exit +compare_versions() { + local version1="$1" + local version2="$2" + local exit_message="\n[!] The desired upgrade version: $2 \nis NOT greater than the current installed version of $1\n\n" + + # Split version strings into arrays & remove leading 0 + IFS='._-' read -ra v1_nums <<< "${version1//.0/.}" + IFS='._-' read -ra v2_nums <<< "${version2//.0/.}" + + # Compare each segment of the version numbers + for (( i = 0; i < ${#v1_nums[@]}; i++ )); do + + v1=${v1_nums[i]} + v2=${v2_nums[i]} + #echo "Comparing $v1 & $v2" + if (( v1 > v2 )); then + #echo "Installed version is newer: $version1" + echo -e $exit_message + exit 1 + elif (( v1 < v2 )); then + #echo "Desired version is newer: $version2" + return 0 + fi + done + + # If all segments are equal, versions are the same + #echo "Both versions are the same: $version1" + echo -e $exit_message + exit 1 +} + +is_mariadb_installed() { + + if [ -z "$package_manager" ]; then + echo "package_manager: $package_manager" + echo "package_manager is not defined" + exit 1; + fi + + mariadb_installed_command="" + case $package_manager in + yum ) + mariadb_installed_command="yum list installed MariaDB-server &>/dev/null" + ;; + apt ) + mariadb_installed_command="dpkg-query -s mariadb-server &> /dev/null;" + ;; + *) # unknown option + echo "\nshutdown_mariadb_and_cmapi - package manager not implemented: $package_manager\n" + exit 2; + esac + + if eval $mariadb_installed_command ; then + return 0 + else + return 1 + fi + +} + +start_mariadb() { + if ! command -v systemctl &> /dev/null ; then + printf "[!!] start_mariadb: Cant access systemctl\n\n" + exit 1; + fi + + # Start MariaDB + if is_mariadb_installed ; then + printf "%-35s ..." " - Starting MariaDB Server" + if systemctl start mariadb ; then + printf " Done\n" + else + echo "[!!] Failed to start MariaDB Server" + exit 1; + fi; + fi +} + +stop_mariadb() { + if ! command -v systemctl &> /dev/null ; then + printf "[!!] stop_mariadb: Cant access systemctl\n\n" + exit 1; + fi + + # Stop MariaDB + if is_mariadb_installed ; then + printf "%-35s ..." " - Stopping MariaDB Server" + if systemctl stop mariadb ; then + printf " Done\n" + else + echo "[!!] Failed to stop MariaDB Server" + exit 1; + fi; + fi +} + +do_yum_remove() { + + if ! command -v yum &> /dev/null ; then + printf "[!!] Cant access yum\n" + exit 1; + fi + + printf "Prechecks\n" + init_cs_down + wait_cs_down + stop_mariadb + stop_cmapi + + if command -v clearShm &> /dev/null ; then + clearShm + fi + + printf "\nRemoving packages \n" + + # remove any mdb rpms on disk + if ls MariaDB-*.rpm &>/dev/null; then + print_and_delete "MariaDB-*.rpm" + fi + + # remove all current MDB packages + if yum list installed MariaDB-* &>/dev/null; then + yum remove MariaDB-* -y + fi + + # remove offical & custom yum repos + printf "\nRemoving\n" + print_and_delete "/etc/yum.repos.d/mariadb.repo" + print_and_delete "/etc/yum.repos.d/drone.repo" + + if [ "$2" == "all" ]; then + print_and_delete "/var/lib/mysql/" + print_and_delete "/var/lib/columnstore/" + print_and_delete "/etc/my.cnf.d/*" + print_and_delete "/etc/columnstore/*" + fi; +} + +do_apt_remove() { + + if ! command -v apt &> /dev/null ; then + printf "[!!] Cant access apt\n" + exit 1; + fi + + if ! command -v dpkg-query &> /dev/null ; then + printf "[!!] Cant access dpkg-query\n" + exit 1; + fi + + printf "\n[+] Prechecks \n" + init_cs_down + wait_cs_down + stop_mariadb + stop_cmapi + + if command -v clearShm &> /dev/null ; then + clearShm + fi + + printf "\n[+] Removing packages - $(date) ... \n" + # remove any mdb rpms on disk + if ls mariadb*.deb &>/dev/null; then + print_and_delete "mariadb*.deb" + fi + + # Delete columnstores post uninstall script for debian/ubuntu as it doesnt work + print_and_delete "/var/lib/dpkg/info/mariadb-plugin-columnstore.postrm" + + # remove all current MDB packages + if [ "$(apt list --installed mariadb-* 2>/dev/null | wc -l)" -gt 1 ]; then + if [ "$2" == "all" ]; then + DEBIAN_FRONTEND=noninteractive apt mariadb-plugin-columnstore mariadb-columnstore-cmapi --purge -y + DEBIAN_FRONTEND=noninteractive apt remove --purge -y mariadb-* + else + if ! apt remove mariadb-columnstore-cmapi --purge -y; then + printf "[!!] Failed to remove columnstore \n" + fi + + if ! apt remove mariadb-* -y; then + printf "[!!] Failed to remove the rest of mariadb \n\n" + fi + fi + fi + + if [ "$(apt list --installed mysql-common 2>/dev/null | wc -l)" -gt 1 ]; then + if ! apt remove mysql-common -y; then + printf "[!!] Failed to remove mysql-common \n" + fi + fi + + printf "\n[+] Removing all columnstore files & dirs\n" + if [ "$2" == "all" ]; then + print_and_delete "/var/lib/mysql" + print_and_delete "/var/lib/columnstore" + print_and_delete "/etc/columnstore" + print_and_delete "/etc/mysql/mariadb.conf.d/columnstore.cnf.rpmsave" + fi + # remove offical & custom yum repos + print_and_delete "/lib/systemd/system/mariadb.service" + print_and_delete "/lib/systemd/system/mariadb.service.d" + print_and_delete "/etc/apt/sources.list.d/mariadb.list*" + print_and_delete "/etc/apt/sources.list.d/drone.list" + systemctl daemon-reload + +} + +do_remove() { + + check_operating_system + check_package_managers + + case $distro_info in + centos | rhel | rocky ) + do_yum_remove "$@" + ;; + + ubuntu | debian ) + do_apt_remove "$@" + ;; + *) # unknown option + echo "\ndo_remove: os & version not implemented: $distro_info\n" + exit 2; + esac + + printf "\nUninstall Complete\n\n" +} + +check_package_managers() { + + package_manager=''; + if command -v apt &> /dev/null ; then + if ! command -v dpkg-query &> /dev/null ; then + printf "[!!] Cant access dpkg-query\n" + exit 1; + fi + package_manager="apt"; + fi + + if command -v yum &> /dev/null ; then + package_manager="yum"; + fi + + if [ $package_manager == '' ]; then + echo "[!!] No package manager found: yum or apt must be installed" + exit 1; + fi; +} + + +# Confirms mac have critical binaries to run this script +# As of 3/2024 supports cs_package_manager.sh check +check_mac_dependencies() { + + # Install ggrep if not exists + if ! which ggrep >/dev/null 2>&1; then + echo "Attempting Auto install of ggrep" + + if ! which brew >/dev/null 2>&1; then + echo "Attempting Auto install of brew" + bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + fi + brew install grep + fi + + # Exit if ggrep still doesnt exist + if ! which ggrep >/dev/null 2>&1; then + echo "Failed to install ggrep" + echo "which ggrep" + echo "" + exit 1; + fi + +} + +check_operating_system() { + + if [[ $OSTYPE == 'darwin'* ]]; then + echo "Running on macOS" + check_mac_dependencies + + # on action=check - these values are used as triggers to prompt the user to select what OS/version they want to check against + distro_info="mac" + distro="mac" + version_id_exact=$(grep -A1 ProductVersion "/System/Library/CoreServices/SystemVersion.plist" | sed -n 's/.*\(.*\)<\/string>.*/\1/p') + version_id=$( echo "$version_id_exact" | awk -F. '{print $1}') + distro_short="${distro_info:0:3}${version_id}" + return + fi; + + distro_info=$(awk -F= '/^ID=/{gsub(/"/, "", $2); print $2}' /etc/os-release) + version_id_exact=$( grep 'VERSION_ID=' /etc/os-release | awk -F= '{gsub(/"/, "", $2); print $2}') + version_id=$( echo "$version_id_exact" | awk -F. '{print $1}') + + echo "Distro: $distro_info" + echo "Version: $version_id" + + # distros=(centos7 debian11 debian12 rockylinux8 rockylinux9 ubuntu20.04 ubuntu22.04) + case $distro_info in + centos | rhel ) + distro="${distro_info}${version_id}" + ;; + debian ) + distro="${distro_info}${version_id_exact}" + ;; + rocky ) + distro="rockylinux${version_id}" + ;; + ubuntu ) + distro="${distro_info}${version_id_exact}" + ;; + *) # unknown option + printf "\ncheck_operating_system: unknown os & version: $distro_info\n" + exit 2; + esac + distro_short="${distro_info:0:3}${version_id}" +} + +check_cpu_architecture() { + + architecture=$(uname -m) + echo "CPU: $architecture" + + # arch=(amd64 arm64) + case $architecture in + x86_64 ) + arch="amd64" + ;; + aarch64 ) + arch="arm64" + ;; + *) # unknown option + echo "Error: Unsupported architecture ($architecture)" + esac +} + +check_mdb_installed() { + packages="" + current_mariadb_version="" + case $package_manager in + yum ) + packages=$(yum list installed | grep -i mariadb) + current_mariadb_version=$(rpm -q --queryformat '%{VERSION}\n' MariaDB-server 2>/dev/null) + ;; + apt ) + packages=$(apt list --installed mariadb-* 2>/dev/null | grep -i mariadb); + current_mariadb_version=$(dpkg-query -f '${Version}\n' -W mariadb-server 2>/dev/null) + # remove prefix 1: & +maria~deb1 - example: current_mariadb_version="1:10.6.16.11+maria~deb1" + current_mariadb_version="${current_mariadb_version#*:}" + current_mariadb_version="${current_mariadb_version%%+*}" + ;; + *) # unknown option + printf "\ncheck_no_mdb_installed: package manager not implemented - $package_manager\n" + exit 2; + esac + + if [ -z "$packages" ]; then + printf "\nMariaDB packages are NOT installed. Please install them before continuing.\n" + echo $packages; + printf "Example: bash $0 install [community|enterprise] [version] \n\n" + exit 2; + fi; + + +} + +check_no_mdb_installed() { + + packages="" + case $distro_info in + centos | rhel | rocky ) + packages=$(yum list installed | grep -i mariadb) + ;; + ubuntu | debian ) + packages=$(apt list --installed mariadb-* 2>/dev/null | grep -i mariadb); + ;; + *) # unknown option + printf "\ncheck_no_mdb_installed: os & version not implemented: $distro_info\n" + exit 2; + esac + + if [ -n "$packages" ]; then + printf "\nMariaDB packages are installed. Please uninstall them before continuing.\n" + echo $packages; + printf "Example: bash $0 remove\n\n" + exit 2; + fi; +} + +check_aws_cli_installed() { + + if ! command -v aws &> /dev/null ; then + echo "[!] aws cli - binary could not be found" + echo "[+] Installing aws cli ..." + + cli_url="https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" + case $architecture in + x86_64 ) + cli_url="https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" + ;; + aarch64 ) + cli_url="https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" + ;; + *) # unknown option + echo "Error: Unsupported architecture ($architecture)" + esac + + case $distro_info in + centos | rhel | rocky ) + rm -rf aws awscliv2.zip + yum install unzip -y; + curl "$cli_url" -o "awscliv2.zip"; + unzip -q awscliv2.zip; + sudo ./aws/install; + mv /usr/local/bin/aws /usr/bin/aws; + aws configure set default.s3.max_concurrent_requests 70 + ;; + ubuntu | debian ) + rm -rf aws awscliv2.zip + if ! sudo apt install unzip -y; then + echo "[!!] Installing Unzip Failed: Trying update" + sudo apt update -y; + sudo apt install unzip -y; + fi + curl "$cli_url" -o "awscliv2.zip"; + unzip -q awscliv2.zip; + sudo ./aws/install; + mv /usr/local/bin/aws /usr/bin/aws; + aws configure set default.s3.max_concurrent_requests 70 + ;; + *) # unknown option + printf "\nos & version not implemented: $distro_info\n" + exit 2; + esac + + + fi +} + +check_dev_build_exists() { + + if ! aws s3 ls $s3_path --no-sign-request &> /dev/null; then + printf "[!] Defined dev build doesnt exist in aws\n\n" + exit 2; + fi; +} + +do_enterprise_apt_install() { + + # Install MariaDB + apt-get clean + if ! apt install mariadb-server -y --quiet; then + printf "\n[!] Failed to install mariadb-server \n\n" + exit 1; + fi + sleep 2 + systemctl daemon-reload + systemctl enable mariadb + systemctl start mariadb + + # Install Columnstore + if ! apt install mariadb-plugin-columnstore -y --quiet; then + printf "\n[!] Failed to install columnstore \n\n" + exit 1; + fi + + # Somes cmapi is installed with columnstore - double check + if ! is_cmapi_installed ; then + if ! apt install mariadb-columnstore-cmapi jq -y --quiet; then + printf "\n[!] Failed to install cmapi\n\n" + mariadb -e "show status like '%Columnstore%';" + fi; + fi + + if is_cmapi_installed ; then + systemctl daemon-reload + systemctl enable mariadb-columnstore-cmapi + systemctl start mariadb-columnstore-cmapi + mariadb -e "show status like '%Columnstore%';" + sleep 2 + + confirm_cmapi_online_and_configured + init_cs_up + fi + +} + +do_enterprise_yum_install() { + + # Install MariaDB + yum clean all + yum install MariaDB-server -y + sleep 2 + systemctl enable mariadb + systemctl start mariadb + + # Install Columnstore + if ! yum install MariaDB-columnstore-engine -y; then + printf "\n[!] Failed to install columnstore\n\n" + exit 1; + fi + + # Install Cmapi + if ! yum install MariaDB-columnstore-cmapi jq -y; then + printf "\n[!] Failed to install cmapi\n\n" + + else + systemctl enable mariadb-columnstore-cmapi + systemctl start mariadb-columnstore-cmapi + mariadb -e "show status like '%Columnstore%';" + sleep 1; + + confirm_cmapi_online_and_configured + init_cs_up + fi + +} + +enterprise_install() { + + version=$3 + check_set_es_token "$@" + + if [ -z $version ]; then + printf "\n[!] Version empty: $version\n\n" + exit 1; + fi; + + echo "Token: $enterprise_token" + echo "MariaDB Version: $version" + echo "-----------------------------------------------" + + url="https://dlm.mariadb.com/enterprise-release-helpers/mariadb_es_repo_setup" + if $enterprise_staging; then + url="https://dlm.mariadb.com/$enterprise_token/enterprise-release-helpers-staging/mariadb_es_repo_setup" + fi + + # Download Repo setup script + rm -rf mariadb_es_repo_setup + curl -LO "$url" -o mariadb_es_repo_setup; + chmod +x mariadb_es_repo_setup; + if ! bash mariadb_es_repo_setup --token="$enterprise_token" --apply --mariadb-server-version="$version"; then + printf "\n[!] Failed to apply mariadb_es_repo_setup...\n\n" + exit 2; + fi; + + case $distro_info in + centos | rhel | rocky ) + + if [ ! -f "/etc/yum.repos.d/mariadb.repo" ]; then printf "\n[!] Expected to find mariadb.repo in /etc/yum.repos.d \n\n"; exit 1; fi; + + if $enterprise_staging; then + sed -i 's/mariadb-es-main/mariadb-es-staging/g' /etc/yum.repos.d/mariadb.repo + sed -i 's/mariadb-enterprise-server/mariadb-enterprise-staging/g' /etc/yum.repos.d/mariadb.repo + printf "\n\n[+] Adjusted mariadb.repo to: mariadb-enterprise-staging\n\n" + fi; + + do_enterprise_yum_install "$@" + ;; + ubuntu | debian ) + + if [ ! -f "/etc/apt/sources.list.d/mariadb.list" ]; then printf "\n[!] Expected to find mariadb.list in /etc/apt/sources.list.d \n\n"; exit 1; fi; + + if $enterprise_staging; then + sed -i 's/mariadb-enterprise-server/mariadb-enterprise-staging/g' /etc/apt/sources.list.d/mariadb.list + apt update + printf "\n\n[+] Adjusted mariadb.list to: mariadb-enterprise-staging\n\n" + fi; + + do_enterprise_apt_install "$@" + ;; + *) # unknown option + printf "\nenterprise_install: os & version not implemented: $distro_info\n" + exit 2; + esac +} + +community_install() { + + version=$3 + if [ -z $version ]; then + printf "Version empty: $version\n" + + exit 1; + fi; + + echo "MariaDB Community Version: $version" + echo "-----------------------------------------------" + + # Download Repo setup + rm -rf mariadb_repo_setup + + if ! curl -sS https://downloads.mariadb.com/MariaDB/mariadb_repo_setup | bash -s -- --mariadb-server-version=mariadb-$version ; then + echo "version bad or mariadb_repo_setup unavailable. exiting ..." + exit 2; + fi; + + case $distro_info in + centos | rhel | rocky ) + do_community_yum_install "$@" + ;; + ubuntu | debian ) + do_community_apt_install "$@" + ;; + *) # unknown option + printf "\ncommunity_install: os & version not implemented: $distro_info\n" + exit 2; + esac + +} + +do_community_yum_install() { + + # Install MariaDB then Columnstore + yum clean all + if ! yum install MariaDB-server -y; then + printf "\n[!] Failed to install MariaDB-server \n\n" + exit 1; + fi + sleep 2; + systemctl enable mariadb + systemctl start mariadb + + # Install Columnstore + if ! yum install MariaDB-columnstore-engine -y; then + printf "\n[!] Failed to install columnstore \n\n" + exit 1; + fi + + cmapi_installable=$(yum list | grep MariaDB-columnstore-cmapi) + if [ -n "$cmapi_installable" ]; then + # Install Cmapi + if ! yum install MariaDB-columnstore-cmapi jq -y; then + printf "\n[!] Failed to install cmapi\n\n" + exit 1; + else + systemctl enable mariadb-columnstore-cmapi + systemctl start mariadb-columnstore-cmapi + mariadb -e "show status like '%Columnstore%';" + sleep 2 + + printf "\nPost Install\n" + confirm_cmapi_online_and_configured + init_cs_up + fi + fi +} + +do_community_apt_install() { + + # Install MariaDB + apt-get clean + if ! apt install mariadb-server -y --quiet; then + printf "\n[!] Failed to install mariadb-server \n\n" + exit 1; + fi + sleep 2 + systemctl daemon-reload + systemctl enable mariadb + systemctl start mariadb + + # Install Columnstore + if ! apt install mariadb-plugin-columnstore -y --quiet; then + printf "\n[!] Failed to install columnstore \n\n" + exit 1; + fi; + + if ! apt install mariadb-columnstore-cmapi jq -y --quiet ; then + printf "\n[!] Failed to install cmapi \n\n" + mariadb -e "show status like '%Columnstore%';" + else + systemctl daemon-reload + systemctl enable mariadb-columnstore-cmapi + systemctl start mariadb-columnstore-cmapi + mariadb -e "show status like '%Columnstore%';" + sleep 2 + + confirm_cmapi_online_and_configured + init_cs_up + fi +} + +get_set_cmapi_key() { + + CMAPI_CNF="/etc/columnstore/cmapi_server.conf" + + if [ ! -f $CMAPI_CNF ]; then + echo "[!!] No cmapi config file found" + exit 1; + fi; + + # Add API Key if missing + if [ -z "$(grep ^x-api-key $CMAPI_CNF)" ]; then + + if ! command -v openssl &> /dev/null ; then + api_key="19bb89d77cb8edfe0864e05228318e3dfa58e8f45435fbd9bd12c462a522a1e9" + else + api_key=$(openssl rand -hex 32) + fi + + printf "%-35s ..." " - Setting API Key:" + if cmapi_output=$( curl -s https://127.0.0.1:8640/cmapi/0.4.0/cluster/status \ + --header 'Content-Type:application/json' \ + --header "x-api-key:$api_key" -k ) ; then + printf " Done - $( echo $cmapi_output | jq -r tostring ) \n" + sleep 2; + else + printf " Failed to set API key\n\n" + exit 1; + fi + else + api_key=$(grep ^x-api-key $CMAPI_CNF | cut -d "=" -f 2 | tr -d " ") + fi +} + +add_node_cmapi_via_curl() { + + node_ip=$1 + if [ -z $api_key ]; then get_set_cmapi_key; fi; + + # Add Node + printf "%-35s ..." " - Adding primary node via curl" + if cmapi_output=$( curl -k -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/node \ + --header 'Content-Type:application/json' \ + --header "x-api-key:$api_key" \ + --data "{\"timeout\": 120, \"node\": \"$node_ip\"}" ); then + printf " Done - $(echo $cmapi_output | jq -r tostring )\n" + else + echo "Failed adding node" + exit 1; + fi + +} + +start_cs_via_systemctl() { + if systemctl start mariadb-columnstore ; then + echo " - Started Columnstore" + else + echo "[!!] Failed to start columnstore via systemctl" + exit 1; + fi; +} + +start_cs_cmapi_via_curl() { + + if [ -z $api_key ]; then get_set_cmapi_key; fi; + + if curl -k -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/start \ + --header 'Content-Type:application/json' \ + --header "x-api-key:$api_key" \ + --data '{"timeout":20}'; then + echo " - Started Columnstore" + else + echo " - [!] Failed to start columnstore via cmapi curl" + echo " - Trying via systemctl ..." + start_cs_via_systemctl + fi; +} + +stop_cs_via_systemctl_override() { + systemctl stop mariadb-columnstore-cmapi; + systemctl stop mcs-ddlproc; + systemctl stop mcs-dmlproc; + systemctl stop mcs-workernode@1; + systemctl stop mcs-workernode@2; + systemctl stop mcs-controllernode; + systemctl stop mcs-storagemanager; + systemctl stop mcs-primproc; + systemctl stop mcs-writeengineserver; +} + +stop_cs_via_systemctl() { + printf " - Trying to stop columnstore via systemctl" + if systemctl stop mariadb-columnstore ; then + printf " Done\n" + else + printf "\n[!!] Failed to stop columnstore\n" + exit 1; + fi; +} + +stop_cs_cmapi_via_curl() { + + if [ -z $api_key ]; then get_set_cmapi_key; fi; + + if curl -k -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/shutdown \ + --header 'Content-Type:application/json' \ + --header "x-api-key:$api_key" \ + --data '{"timeout":20}'; then + echo " - Stopped Columnstore via curl" + else + printf "\n[!] Failed to stop columnstore via cmapi\n" + stop_cs_via_systemctl + fi; +} + +add_primary_node_cmapi() { + + primary_ip="127.0.0.1" + if [ -z $api_key ]; then get_set_cmapi_key; fi; + + if command -v mcs &> /dev/null ; then + # Only add 127.0.0.1 if no nodes are configured in cmapi + if [ "$(mcs cluster status | jq -r '.num_nodes')" == "0" ]; then + printf "%-35s ..." " - Adding primary node" + if mcs_output=$( timeout 30s mcs cluster node add --node $primary_ip ); then + echo " Done - $( echo $mcs_output | jq -r tostring )" + else + echo "[!] Failed ... trying cmapi curl" + echo "$mcs_output" + add_node_cmapi_via_curl $primary_ip + fi; + fi; + + else + echo "mcs - binary could not be found" + add_node_cmapi_via_curl $primary_ip + printf "%-35s ..." " - Starting Columnstore Engine" + start_cs_cmapi_via_curl + fi +} + + +dev_install() { + + if [ -z $dev_drone_key ]; then printf "Missing dev_drone_key: \n"; exit; fi; + check_aws_cli_installed + + echo "Branch: $3" + echo "Build: $4" + dronePath="s3://$dev_drone_key" + branch="$3" + build="$4" + product="10.6-enterprise" + if [ -z "$branch" ]; then printf "Missing branch: $branch\n"; exit 2; fi; + if [ -z "$build" ]; then printf "Missing build: $branch\n"; exit 2; fi; + + # Construct URLs + s3_path="$dronePath/$branch/$build/$product/$arch" + drone_http=$(echo "$s3_path" | sed "s|s3://$dev_drone_key/|https://${dev_drone_key}.s3.amazonaws.com/|") + echo "Locations:" + echo "Bucket: $s3_path" + echo "Drone: $drone_http" + echo "###################################" + + check_dev_build_exists + + case $distro_info in + centos | rhel | rocky ) + s3_path="${s3_path}/$distro" + drone_http="${drone_http}/$distro" + do_dev_yum_install "$@" + ;; + ubuntu | debian ) + do_dev_apt_install "$@" + ;; + *) # unknown option + printf "\ndev_install: os & version not implemented: $distro_info\n" + exit 2; + esac + + + confirm_cmapi_online_and_configured + init_cs_up +} + +do_dev_yum_install() { + + echo "[drone] +name=Drone Repository +baseurl="$drone_http" +gpgcheck=0 +enabled=1 + " > /etc/yum.repos.d/drone.repo + yum clean all + # yum makecache + # yum list --disablerepo="*" --enablerepo="drone" + + # ALL RPMS: aws s3 cp $s3_path/ . --recursive --exclude "debuginfo" --include "*.rpm" + aws s3 cp $s3_path/ . --recursive --exclude "*" --include "MariaDB-server*" --exclude "*debug*" --no-sign-request + + # Confirm Downloaded server rpm + if ! ls MariaDB-server-*.rpm 1> /dev/null 2>&1; then + echo "Error: No MariaDB-server RPMs were found." + exit 1 + fi + + # Install MariaDB Server + if ! yum install MariaDB-server-*.rpm -y; then + printf "\n[!] Failed to install MariaDB-server \n\n" + exit 1; + fi + + # Install Columnstore + if ! yum install MariaDB-columnstore-engine -y; then + printf "\n[!] Failed to install columnstore \n\n" + exit 1; + fi + + # Install Cmapi + if ! yum install MariaDB-columnstore-cmapi jq -y; then + printf "\n[!] Failed to install cmapi\n\n" + exit 1; + else + systemctl start mariadb + systemctl enable mariadb-columnstore-cmapi + systemctl start mariadb-columnstore-cmapi + mariadb -e "show status like '%Columnstore%';" + sleep 2 + + confirm_cmapi_online_and_configured + init_cs_up + fi +} + +do_dev_apt_install() { + + echo "deb [trusted=yes] ${drone_http} ${distro}/" > /etc/apt/sources.list.d/repo.list + cat << EOF > /etc/apt/preferences +Package: * +Pin: origin cspkg.s3.amazonaws.com +Pin-Priority: 1700 +EOF + + # Install MariaDB + apt-get clean + apt-get update + if ! apt install mariadb-server -y --quiet; then + printf "\n[!] Failed to install mariadb-server \n\n" + exit 1; + fi + sleep 2 + systemctl daemon-reload + systemctl enable mariadb + systemctl start mariadb + + # Install Columnstore + if ! apt install mariadb-plugin-columnstore -y --quiet; then + printf "\n[!] Failed to install columnstore \n\n" + exit 1; + fi; + + if ! apt install mariadb-columnstore-cmapi jq -y --quiet ; then + printf "\n[!] Failed to install cmapi \n\n" + mariadb -e "show status like '%Columnstore%';" + else + systemctl daemon-reload + systemctl enable mariadb-columnstore-cmapi + systemctl start mariadb-columnstore-cmapi + mariadb -e "show status like '%Columnstore%';" + sleep 2 + + confirm_cmapi_online_and_configured + init_cs_up + fi + +} + +do_install() { + + check_operating_system + check_cpu_architecture + check_no_mdb_installed + check_package_managers + + repo=$2 + enterprise_staging=false + echo "Repository: $repo" + case $repo in + enterprise ) + # pull from enterprise repo + enterprise_install "$@" ; + ;; + enterprise_staging ) + enterprise_staging=true + enterprise_install "$@" ; + ;; + community ) + # pull from public community repo + community_install "$@" ; + ;; + dev ) + # pull from dev repo - requires dev_drone_key + dev_install "$@" ; + ;; + *) # unknown option + echo "Unknown repo: $repo\n" + exit 2; + esac + + printf "\nInstall Complete\n\n" +} + +# Small augmentation of https://github.com/mariadb-corporation/mariadb-columnstore-engine/blob/develop/cmapi/check_ready.sh +cmapi_check_ready() { + SEC_TO_WAIT=15 + cmapi_success=false + for i in $(seq 1 $SEC_TO_WAIT); do + printf "." + if ! $(curl -k -s --output /dev/null --fail https://127.0.0.1:8640/cmapi/ready); then + sleep 1 + else + cmapi_success=true + break + fi + done + + if $cmapi_success; then + return 0; + else + printf "\nCMAPI not ready after waiting $SEC_TO_WAIT seconds. Check log file for further details.\n\n" + exit 1; + fi +} + +confirm_cmapi_online_and_configured() { + + if command -v mcs &> /dev/null; then + cmapi_current_status=$(mcs cmapi is-ready 2> /dev/null); + if [ $? -ne 0 ]; then + + # if cmapi is not online - check systemd is running and start cmapi + if [ "$(ps -p 1 -o comm=)" = "systemd" ]; then + + printf "%-35s .." " - Checking CMAPI Online" + if systemctl start mariadb-columnstore-cmapi; then + cmapi_check_ready + printf " Pass\n" + else + echo "[!!] Failed to start CMAPI" + exit 1; + fi + else + printf "systemd is not running - cant start cmapi\n\n" + exit 1; + fi + else + + # Check if the JSON string is in the expected format + if ! echo "$cmapi_current_status" | jq -e '.started | type == "boolean"' >/dev/null; then + echo "Error: CMAPI JSON string response is not in the expected format" + exit 1 + fi + + # Check if 'started' is true + if ! echo "$cmapi_current_status" | jq -e '.started == true' >/dev/null; then + echo "Error: CMAPI 'started' is not true" + echo "mcs cmapi is-ready" + exit 1 + fi + fi + else + printf "%-35s ..." " - Checking CMAPI online" + cmapi_check_ready + printf " Done\n" + fi; + + + confirm_nodes_configured +} + +# currently supports singlenode only +confirm_nodes_configured() { + # If the first run after install will set cmapi key for 'mcs cluster status' to work + if [ -z $api_key ]; then get_set_cmapi_key; fi; + + # Check for edge case of cmapi not configured + if command -v mcs &> /dev/null; then + if [ "$(mcs cluster status | jq -r '.num_nodes')" == "0" ]; then + add_primary_node_cmapi + sleep 1; + fi + else + + if [ "$(curl -k -s https://127.0.0.1:8640/cmapi/0.4.0/cluster/status \ + --header 'Content-Type:application/json' \ + --header "x-api-key:$api_key" | jq -r '.num_nodes')" == "0" ] ; then + echo " - Stopped Columnstore via curl" + else + add_primary_node_cmapi + sleep 1; + fi; + fi +} + +# For future release +do_dev_upgrade() { + echo "fsadfa" +} + +# For future release +dev_upgrade() { + + # Variables + if [ -z $dev_drone_key ]; then printf "[!] Missing dev_drone_key \nvi $0\n"; exit; fi; + check_aws_cli_installed + echo "Branch: $3" + echo "Build: $4" + dronePath="s3://$dev_drone_key" + branch="$3" + build="$4" + product="10.6-enterprise" + if [ -z "$branch" ]; then printf "Missing branch: $branch\n"; exit 2; fi; + if [ -z "$build" ]; then printf "Missing build: $branch\n"; exit 2; fi; + + # Construct URLs + s3_path="$dronePath/$branch/$build/$product/$arch" + drone_http=$(echo "$s3_path" | sed "s|s3://$dev_drone_key/|https://${dev_drone_key}.s3.amazonaws.com/|") + echo "Upgrade Version" + echo "Bucket: $s3_path" + echo "Drone: $drone_http" + echo "-----------------------------------------------" + + # Prechecks + printf "\nPrechecks\n" + check_dev_build_exists + check_gtid_strict_mode + + # Stop All + init_cs_down + wait_cs_down + stop_mariadb + stop_cmapi + + # Make backups of configurations, dbrms + pre_upgrade_dbrm_backup + pre_upgrade_configuration_backup + + # Upgrade + do_dev_upgrade + + # Start All + printf "\nStartup\n" + start_mariadb + start_cmapi + init_cs_up + + # Post Upgrade + confirm_dbrmctl_ok + run_mariadb_upgrade +} + +do_community_upgrade () { + + # Download Repo setup + printf "\nDownloading Repo Setup\n" + rm -rf mariadb_repo_setup + if ! curl -sS https://downloads.mariadb.com/MariaDB/mariadb_repo_setup | bash -s -- --mariadb-server-version=mariadb-$version ; then + printf "\n[!] Failed to apply mariadb_repo_setup...\n\n" + exit 2; + fi; + + case $package_manager in + yum ) + if [ ! -f "/etc/yum.repos.d/mariadb.repo" ]; then printf "\n[!] enterprise_upgrade: Expected to find mariadb.repo in /etc/yum.repos.d \n\n"; exit 1; fi; + + # Run the YUM update + printf "\nBeginning Update\n" + if yum update "MariaDB-*" "MariaDB-columnstore-engine" "MariaDB-columnstore-cmapi"; then + printf " - Success Update\n" + else + printf "[!!] Failed to update - exit code: $? \n" + printf "Check messages above if uninstall/reinstall of new version required\n\n" + exit 1; + fi + ;; + apt ) + if [ ! -f "/etc/apt/sources.list.d/mariadb.list" ]; then printf "\n[!] enterprise_upgrade: Expected to find mariadb.list in /etc/apt/sources.list.d \n\n"; exit 1; fi; + + # Run the APT update + printf "\nBeginning Update\n" + apt-get clean + if apt update; then + echo " - Success Update" + else + echo "[!!] Failed to update " + exit 1; + fi + + if apt install --only-upgrade '?upgradable ?name(mariadb.*)'; then + echo " - Success Update mariadb.*" + else + echo "[!!] Failed to update " + exit 1; + fi + systemctl daemon-reload + ;; + *) # unknown option + printf "\ndo_community_upgrade: os & version not implemented: $distro_info\n" + exit 2; + esac +} + +community_upgrade() { + + version=$3 + if [ -z $version ]; then + printf "\n[!] Version empty: $version\n\n" + exit 1; + fi; + + echo "Current MariaDB Verison: $current_mariadb_version" + echo "Upgrade To MariaDB Version: $version" + echo "-----------------------------------------------" + + # Prechecks + printf "\nPrechecks\n" + check_gtid_strict_mode + check_mariadb_versions + + # Stop All + init_cs_down + wait_cs_down + stop_mariadb + stop_cmapi + + # Make backups of configurations, dbrms + pre_upgrade_dbrm_backup + pre_upgrade_configuration_backup + + # Upgrade + do_community_upgrade + + # Start All + printf "\nStartup\n" + start_mariadb + start_cmapi + init_cs_up + + # Post Upgrade + confirm_dbrmctl_ok + run_mariadb_upgrade + +} + +confirm_dbrmctl_ok() { + retry_limit=1800 + retry_counter=0 + printf "%-35s ... " " - Checking DBRM Status" + good_dbrm_status="OK. (and the system is ready)" + current_status=$(dbrmctl -v status); + while [ "$current_status" != "$good_dbrm_status" ]; do + sleep 1 + printf "." + current_status=$(dbrmctl -v status); + if [ $? -ne 0 ]; then + printf "\n[!] Failed to get dbrmctl -v status\n\n" + exit 1 + fi + if [ $retry_counter -ge $retry_limit ]; then + printf "\n[!] Set columnstore readonly wait retry limit exceeded: $retry_counter \n\n" + exit 1 + fi + + ((retry_counter++)) + done + printf "$current_status \n" +} + +pre_upgrade_dbrm_backup() { + + if [ ! -f "mcs_backup_manager.sh" ]; then + wget https://raw.githubusercontent.com/mariadb-corporation/mariadb-columnstore-engine/develop/cmapi/scripts/mcs_backup_manager.sh; chmod +x mcs_backup_manager.sh; + fi; + if ! source mcs_backup_manager.sh source ;then + printf "\n[!!] Failed to source mcs_backup_manager.sh\n\n" + exit 1; + else + echo " - Sourced mcs_backup_manager.sh" + fi + # Confirm the function exists and the source of mcs_backup_manager.sh worked + if command -v process_dbrm_backup &> /dev/null; then + # Take an automated backup + if ! process_dbrm_backup -r 9999 -nb preupgrade_dbrm_backup --quiet ; then + echo "[!!] Failed to take a DBRM backup before restoring" + echo "exiting ..." + exit 1; + fi; + else + echo "Error: 'process_dbrm_backup' function not found via mcs_backup_manager.sh"; + exit 1; + fi + + +} + +pre_upgrade_configuration_backup() { + pre_upgrade_config_directory="/tmp/preupgrade-configurations-$(date +%m-%d-%Y-%H%M)" + case $distro_info in + centos | rhel | rocky ) + printf "[+] Created: $pre_upgrade_config_directory \n" + mkdir -p $pre_upgrade_config_directory + print_and_copy "/etc/columnstore/Columnstore.xml" "$pre_upgrade_config_directory" + print_and_copy "/etc/columnstore/storagemanager.cnf" "$pre_upgrade_config_directory" + print_and_copy "/etc/columnstore/cmapi_server.conf" "$pre_upgrade_config_directory" + print_and_copy "/etc/my.cnf.d/server.cnf" "$pre_upgrade_config_directory" + # convert server.cnf to a find incase mysql dir not standard + ;; + ubuntu | debian ) + printf "[+] Created: $pre_upgrade_config_directory \n" + mkdir -p $pre_upgrade_config_directory + print_and_copy "/etc/columnstore/Columnstore.xml" "$pre_upgrade_config_directory" + print_and_copy "/etc/columnstore/storagemanager.cnf" "$pre_upgrade_config_directory" + print_and_copy "/etc/columnstore/cmapi_server.conf" "$pre_upgrade_config_directory" + print_and_copy "/etc/mysql/mariadb.conf.d/*server.cnf" "$pre_upgrade_config_directory" + ;; + *) # unknown option + printf "\npre_upgrade_configuration_backup: os & version not implemented: $distro_info\n" + exit 2; + esac +} + + +check_mariadb_versions() { + + if [ -z "$current_mariadb_version" ]; then + printf "[!] No current current_mariadb_version detected" + exit 2; + fi + + if [ -z "$version" ]; then + printf "[!] No current upgrade version detected" + exit 2; + fi + + printf "%-35s ..." " - Checking MariaDB Version Newer" + compare_versions "$current_mariadb_version" "$version" + printf " Done\n" +} + +check_gtid_strict_mode() { + if ! command -v my_print_defaults &> /dev/null; then + printf "\n[!] my_print_defaults not found. Ensure gtid_strict_mode=0 \n" + else + printf "%-35s ..." " - Checking gtid_strict_mode" + strict_mode=$(my_print_defaults --mysqld 2>/dev/null | grep "gtid[-_]strict[-_]mode") + if [ -n "$strict_mode" ] && [ $strict_mode == "--gtid_strict_mode=1" ]; then + echo "my_print_defaults --mysqld | grep gtid[-_]strict[-_]mode Result: $strict_mode" + printf "Disable gtid_strict_mode before trying again\n\n" + exit 1; + else + printf " Done\n" + fi + fi +} + +run_mariadb_upgrade() { + if ! command -v mariadb-upgrade &> /dev/null; then + printf "\n[!] mariadb-upgrade not found. Please install mariadb-upgrade\n\n" + exit 1; + fi + + if [ "$pm_number" == "1" ]; then + printf "\nMariaDB Upgrade\n" + if ! mariadb-upgrade --write-binlog ; then + printf "[!!] Failed to complete mariadb-upgrade \n" + exit 1; + fi + fi +} + +do_enterprise_upgrade() { + + # Download Repo setup script & run it + printf "\nDownloading Repo Setup\n" + rm -rf mariadb_es_repo_setup + url="https://dlm.mariadb.com/enterprise-release-helpers/mariadb_es_repo_setup" + if $enterprise_staging; then + url="https://dlm.mariadb.com/$enterprise_token/enterprise-release-helpers-staging/mariadb_es_repo_setup" + fi + curl -LO "$url" -o mariadb_es_repo_setup; + chmod +x mariadb_es_repo_setup; + if ! bash mariadb_es_repo_setup --token="$enterprise_token" --apply --mariadb-server-version="$version"; then + printf "\n[!] Failed to apply mariadb_es_repo_setup...\n\n" + exit 2; + fi; + + case $package_manager in + yum ) + if [ ! -f "/etc/yum.repos.d/mariadb.repo" ]; then printf "\n[!] enterprise_upgrade: Expected to find mariadb.repo in /etc/yum.repos.d \n\n"; exit 1; fi; + + if $enterprise_staging; then + sed -i 's/mariadb-es-main/mariadb-es-staging/g' /etc/yum.repos.d/mariadb.repo + sed -i 's/mariadb-enterprise-server/mariadb-enterprise-staging/g' /etc/yum.repos.d/mariadb.repo + printf "\n\n[+] Adjusted mariadb.repo to: mariadb-enterprise-staging\n\n" + fi; + + # Run the YUM update + printf "\nBeginning Update\n" + if yum update "MariaDB-*" "MariaDB-columnstore-engine" "MariaDB-columnstore-cmapi"; then + echo " - Success Update" + else + echo "[!!] Failed to update " + exit 1; + fi + ;; + apt ) + if [ ! -f "/etc/apt/sources.list.d/mariadb.list" ]; then printf "\n[!] enterprise_upgrade: Expected to find mariadb.list in /etc/apt/sources.list.d \n\n"; exit 1; fi; + + if $enterprise_staging; then + sed -i 's/mariadb-enterprise-server/mariadb-enterprise-staging/g' /etc/apt/sources.list.d/mariadb.list + apt update + printf "\n\n[+] Adjusted mariadb.list to: mariadb-enterprise-staging\n\n" + fi; + + # Run the APT update + printf "\nBeginning Update\n" + apt-get clean + if apt update; then + echo " - Success Update" + else + echo "[!!] Failed to update " + exit 1; + fi + + if apt install --only-upgrade '?upgradable ?name(mariadb.*)'; then + echo " - Success Update mariadb.*" + else + echo "[!!] Failed to update " + exit 1; + fi + systemctl daemon-reload + ;; + *) # unknown option + printf "\nenterprise_upgrade: os & version not implemented: $distro_info\n" + exit 2; + esac +} + +enterprise_upgrade() { + + # Variables + check_set_es_token "$@" + version=$3 + if [ -z $version ]; then + printf "\n[!] Version empty: $version\n\n" + exit 1; + fi; + echo "Token: $enterprise_token" + echo "Current MariaDB Verison: $current_mariadb_version" + echo "Upgrade To MariaDB Version: $version" + echo "-----------------------------------------------" + + # Prechecks + printf "\nPrechecks\n" + check_gtid_strict_mode + check_mariadb_versions + + # Stop All + init_cs_down + wait_cs_down + stop_mariadb + stop_cmapi + + # Make backups of configurations, dbrms + pre_upgrade_dbrm_backup + pre_upgrade_configuration_backup + + # Upgrade + do_enterprise_upgrade + + # Start All + printf "\nStartup\n" + start_mariadb + start_cmapi + init_cs_up + + # Post Upgrade + confirm_dbrmctl_ok + run_mariadb_upgrade +} + +do_upgrade() { + + check_operating_system + check_cpu_architecture + check_package_managers + check_mdb_installed + + repo=$2 + echo "Repository: $repo" + enterprise_staging=false + case $repo in + enterprise ) + enterprise_upgrade "$@" ; + ;; + enterprise_staging ) + enterprise_staging=true + enterprise_upgrade "$@" ; + ;; + community ) + community_upgrade "$@" ; + ;; + dev ) + # For future release + # dev_upgrade "$@" ; + ;; + *) # unknown option + echo "do_upgrade - Unknown repo: $repo\n" + exit 2; + esac + + printf "\nUpgrade Complete\n\n" + +} + +# A quick way when a mac user runs "cs_package_manager.sh check" +# since theres no /etc/os-release to auto detect what OS & version to search the mariadb repos on mac +prompt_user_for_os() { + + # Prompt the user to select an operating system + echo "Please select an operating system to search for:" + os_options=("centos" "rhel" "rocky" "ubuntu" "debian") + select opt in "${os_options[@]}"; do + case $opt in + "centos" | "rhel" | "rocky" ) + distro_info=$opt + echo "What major version of $distro_info:" + short_options=("7" "8" "9") + select short in "${short_options[@]}"; do + case $short in + "7" | "8" | "9") + version_id=$short + distro_short="${distro_info:0:3}${version_id}" + break + ;; + + *) + echo "Invalid option, please try again." + ;; + esac + done + break + ;; + "ubuntu") + distro_info=$opt + echo "What major version of $distro_info:" + short_options=("20.04" "22.04" "23.04" "23.10") + select short in "${short_options[@]}"; do + case $short in + "20.04" | "22.04" | "23.04" | "23.10") + version_id=${short//./} + #version_id=$short + distro_short="${distro_info:0:3}${version_id}" + break + ;; + + *) + echo "Invalid option, please try again." + ;; + esac + done + break + ;; + + *) + echo "Invalid option, please try again." + ;; + esac + done + + echo "Distro: $distro_info" + echo "Version: $version_id" + +} + +# A quick way for mac users to select an OS when running "cs_package_manager.sh check" +# since theres no /etc/os-release to auto detect what OS & version to search the mariadb repos on mac +prompt_user_for_os() { + + # Prompt the user to select an operating system + echo "Please select an operating system to search for:" + os_options=("centos" "rhel" "rocky" "ubuntu" "debian") + select opt in "${os_options[@]}"; do + case $opt in + "centos" | "rhel" | "rocky" ) + distro_info=$opt + echo "What major version of $distro_info:" + short_options=("7" "8" "9") + select short in "${short_options[@]}"; do + case $short in + "7" | "8" | "9") + version_id=$short + distro_short="${distro_info:0:3}${version_id}" + break + ;; + + *) + echo "Invalid option, please try again." + ;; + esac + done + break + ;; + "ubuntu") + distro_info=$opt + echo "What major version of $distro_info:" + short_options=("20.04" "22.04" "23.04" "23.10") + select short in "${short_options[@]}"; do + case $short in + "20.04" | "22.04" | "23.04" | "23.10") + version_id=${short//./} + #version_id=$short + distro_short="${distro_info:0:3}${version_id}" + break + ;; + + *) + echo "Invalid option, please try again." + ;; + esac + done + break + ;; + + *) + echo "Invalid option, please try again." + ;; + esac + done + + echo "Distro: $distro_info" + echo "Version: $version_id" + +} + +do_check() { + + check_operating_system + check_cpu_architecture + + repo=$2 + dbm_tmp_file="mdb-tmp.html" + grep=$(which grep) + if [ $distro_info == "mac" ]; then + grep=$(which ggrep) + + prompt_user_for_os + fi + + echo "Repository: $repo" + case $repo in + enterprise ) + check_set_es_token "$@" + + url_base="https://dlm.mariadb.com" + url_page="/browse/$enterprise_token/mariadb_enterprise_server/" + # aaaa + ignore="/login" + at_least_one=false + curl -s "$url_base$url_page" > $dbm_tmp_file + if [ $? -ne 0 ]; then + printf "\n[!] Failed to access $url_base$url_page\n\n" + exit 1 + fi + if grep -q "404 - Page Not Found" $dbm_tmp_file; then + printf "\n[!] 404 - Failed to access $url_base$url_page\n" + printf "Confirm your ES token works\n" + printf "See: https://customers.mariadb.com/downloads/token/ \n\n" + exit 1 + fi + major_version_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep $url_page | grep -v $ignore ) + #echo $major_version_links + for major_link in ${major_version_links[@]} + do + #echo "Major: $major_link" + curl -s "$url_base$major_link" > $dbm_tmp_file + minor_version_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep $url_page | grep -v $ignore ) + for minor_link in ${minor_version_links[@]} + do + if [ "$minor_link" != "$url_page" ]; then + #echo " Minor: $minor_link" + case $distro_info in + centos | rhel | rocky ) + path="rpm/rhel/$version_id/$architecture/rpms/" + curl -s "$url_base$minor_link$path" > $dbm_tmp_file + package_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep "$path" | grep "columnstore-engine" | grep -v debug | tail -1 ) + if [ ! -z "$package_links" ]; then + #echo "----------" + #echo "$package_links" + at_least_one=true + mariadb_version="${package_links#*mariadb-enterprise-server/}" + columnstore_version="${mariadb_version#*columnstore-engine-}" + mariadb_version="$( echo $mariadb_version | awk -F/ '{print $1}' )" + + # unqiue to enterprise + standard_mariadb_version="${mariadb_version//-/_}" + columnstore_version="$( echo $columnstore_version | awk -F"${standard_mariadb_version}_" '{print $2}' | awk -F".el" '{print $1}' )" + printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; + fi; + ;; + ubuntu | debian ) + + path="deb/pool/main/m/" + curl -s "$url_base$minor_link$path" > $dbm_tmp_file + + # unqiue - this link/path can change + mariadb_version_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep -v $ignore | grep -v cmapi | grep ^mariadb ) + #echo "$url_base$minor_link$path" + for mariadb_link in ${mariadb_version_links[@]} + do + #echo $mariadb_link + path="deb/pool/main/m/$mariadb_link" + curl -s "$url_base$minor_link$path" > $dbm_tmp_file + package_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep "$path" | grep "columnstore_" | grep -v debug | grep $distro_short | tail -1 ) + if [ ! -z "$package_links" ]; then + # echo "$package_links" + # echo "----------" + at_least_one=true + mariadb_version="${package_links#*mariadb-enterprise-server/}" + columnstore_version="${mariadb_version#*columnstore-engine-}" + mariadb_version="$( echo $mariadb_version | awk -F/ '{print $1}' )" + columnstore_version="$( echo $columnstore_version | awk -F"columnstore_" '{print $2}' | awk -F"-" '{print $2}' | awk -F"+" '{print $1}' )" + printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; + fi; + done + + ;; + *) # unknown option + printf "\ndo_check: Not implemented for: $distro_info\n\n" + exit 2; + esac + fi; + done + done + + if ! $at_least_one; then + printf "\n[!] No columnstore packages found for: $distro_short $arch \n\n" + fi + ;; + community ) + + # pull from public community repo + url_base="https://dlm.mariadb.com" + url_page="/browse/mariadb_server/" + ignore="/login" + at_least_one=false + curl -s "$url_base$url_page" > $dbm_tmp_file + major_version_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep $url_page | grep -v $ignore ) + + for major_link in ${major_version_links[@]} + do + #echo "Major: $major_link" + curl -s "$url_base$major_link" > $dbm_tmp_file + minor_version_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep $url_page | grep -v $ignore ) + for minor_link in ${minor_version_links[@]} + do + if [ "$minor_link" != "$url_page" ]; then + #echo " Minor: $minor_link" + case $distro_info in + centos | rhel | rocky ) + path="yum/centos/$version_id/$architecture/rpms/" + curl -s "$url_base$minor_link$path" > $dbm_tmp_file + package_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep "$path" | grep "columnstore-engine" | grep -v debug | tail -1 ) + if [ ! -z "$package_links" ]; then + # echo "$package_links" + # echo "----------" + at_least_one=true + mariadb_version="${package_links#*mariadb-}" + columnstore_version="${mariadb_version#*columnstore-engine-}" + mariadb_version="$( echo $mariadb_version | awk -F/ '{print $1}' )" + columnstore_version="$( echo $columnstore_version | awk -F_ '{print $2}' | awk -F".el" '{print $1}' )" + # echo "MariaDB: $mariadb_version Columnstore: $columnstore_version" + printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; + fi; + ;; + ubuntu | debian ) + path="repo/$distro_info/pool/main/m/mariadb/" + curl -s "$url_base$minor_link$path" > $dbm_tmp_file + package_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep "$path" | grep "columnstore_" | grep -v debug | grep $distro_short | tail -1 ) + if [ ! -z "$package_links" ]; then + # echo "$package_links" + # echo "----------" + at_least_one=true + mariadb_version="${package_links#*mariadb-}" + columnstore_version="${mariadb_version#*columnstore-engine-}" + mariadb_version="$( echo $mariadb_version | awk -F/ '{print $1}' )" + columnstore_version="$( echo $columnstore_version | awk -F"columnstore_" '{print $2}' | awk -F"-" '{print $2}' | awk -F'\\+maria' '{print $1}' 2>/dev/null) " + # echo "MariaDB: $mariadb_version Columnstore: $columnstore_version" + printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; + fi; + ;; + *) # unknown option + printf "Not implemented for: $distro_info\n" + exit 2; + esac + fi + done + done + + if ! $at_least_one; then + printf "\n[!] No columnstore packages found for: $distro_short $arch \n\n" + fi + ;; + dev ) + printf "Not implemented for: $repo\n" + exit 1; + ;; + *) # unknown option + printf "Unknown repo: $repo\n" + exit 2; + esac +} + +global_dependencies() { + if ! command -v curl &> /dev/null; then + printf "\n[!] curl not found. Please install curl\n\n" + exit 1; + fi +} + +check_set_es_token() { + while [[ $# -gt 0 ]]; do + parameter="$1" + + case $parameter in + --token) + enterprise_token="$2" + shift # past argument + shift # past value + ;; + *) # unknown option + shift # past argument + ;; + esac + done + + if [ -z $enterprise_token ]; then + printf "\n[!] Enterprise token empty: $enterprise_token\n" + printf "1) edit $0 enterprise_token='xxxxxx' \n" + printf "2) add flag --token xxxxxxxxx \n" + printf "Find your token @ https://customers.mariadb.com/downloads/token/ \n\n" + + exit 1; + fi; +} + +print_cs_pkg_mgr_version_info() { + echo "MariaDB Columnstore Package Manager" + echo "Version: $cs_pkg_manager_version" +} + +global_dependencies + +case $action in + remove ) + do_remove "$@" ; + ;; + install ) + do_install "$@"; + ;; + upgrade ) + do_upgrade "$@" ; + ;; + check ) + do_check "$@" + ;; + help | -h | --help | -help) + print_help_text; + exit 1; + ;; + add ) + add_node_cmapi_via_curl "127.0.0.1" + ;; + -v | version ) + print_cs_pkg_mgr_version_info + ;; + source ) + return 0; + ;; + *) # unknown option + printf "Unknown Action: $1\n" + print_help_text + exit 2; +esac diff --git a/extra/mcs_backup_manager.sh b/cmapi/scripts/mcs_backup_manager.sh similarity index 71% rename from extra/mcs_backup_manager.sh rename to cmapi/scripts/mcs_backup_manager.sh index 1790bf0d7..d140c1c94 100644 --- a/extra/mcs_backup_manager.sh +++ b/cmapi/scripts/mcs_backup_manager.sh @@ -13,8 +13,8 @@ # ######################################################################## # Documentation: bash mcs_backup_manager.sh help -# Version: 3.4 -# +# Version: 3.8 +# # Backup Example # LocalStorage: sudo ./mcs_backup_manager.sh backup # S3: sudo ./mcs_backup_manager.sh backup -bb s3://my-cs-backups @@ -23,22 +23,23 @@ # # Restore Example # LocalStorage: sudo ./mcs_backup_manager.sh restore -l -# S3: sudo ./mcs_backup_manager.sh restore -bb s3://my-cs-backups -l -# +# S3: sudo ./mcs_backup_manager.sh restore -bb s3://my-cs-backups -l +# ######################################################################## - +mcs_bk_manager_version="3.8" start=$(date +%s) action=$1 print_action_help_text() { echo " MariaDB Columnstore Backup Manager - + Actions: backup Full & Incremental columnstore backup with additional flags to augment the backup taken restore Restore a backup taken with this script dbrm_backup Quick hot backup of internal columnstore metadata only - only use under support recommendation + dbrm_restore Restore internal columnstore metadata from dbrm_backup - only use under support recommendation Documentation: bash $0 help @@ -54,31 +55,22 @@ check_operating_system() { # Supported OS case $OPERATING_SYSTEM in - centos ) + centos | rhel | rocky ) return 1; ;; - rhel ) - return 1 - ;; - debian ) - return 1; - ;; - rocky ) - return 1; - ;; - ubuntu ) + ubuntu | debian ) return 1; ;; *) # unknown option printf "\ncheck_operating_system: unknown os & version: $OPERATING_SYSTEM\n" exit 2; - esac + esac } load_default_backup_variables() { check_operating_system - + # What directory to store the backups on this machine or the target machine. # Consider write permissions of the scp user and the user running this script. # Mariadb-backup will use this location as a tmp dir for S3 and remote backups temporarily @@ -109,7 +101,7 @@ load_default_backup_variables() ;; *) # unknown option handle_failed_dependencies "\nload_default_backup_variables: unknown os & version: $OPERATING_SYSTEM\n"; - esac + esac # Fixed Paths CS_CONFIGS_PATH="/etc/columnstore" @@ -123,7 +115,7 @@ load_default_backup_variables() cs_cache=$(grep -A25 "\[Cache\]" $STORAGEMANGER_CNF | grep ^path | cut -d "=" -f 2 | tr -d " ") # What storage topogoly is being used by Columnstore - found in /etc/columnstore/storagemanager.cnf - # Options: "LocalStorage" or "S3" + # Options: "LocalStorage" or "S3" storage=$(grep -m 1 "^service = " $STORAGEMANGER_CNF | awk '{print $3}') # Name of the existing bucket used in the cluster - found in /etc/columnstore/storagemanager.cnf @@ -133,7 +125,7 @@ load_default_backup_variables() # modes ['direct','indirect'] - direct backups run on the columnstore nodes themselves. indirect run on another machine that has read-only mounts associated with columnstore/mariadb mode="direct" - # Name of the Configuration file to load variables from + # Name of the Configuration file to load variables from config_file=".cs-backup-config" # Track your write speed with "dstat --top-cpu --top-io" @@ -160,7 +152,7 @@ load_default_backup_variables() if [ ! -f /var/lib/columnstore/local/module ]; then pm="pm1"; else pm=$(cat /var/lib/columnstore/local/module); fi; PM_NUMBER=$(echo "$pm" | tr -dc '0-9') if [[ -z $PM_NUMBER ]]; then PM_NUMBER=1; fi; - + #source_ips=$(grep -E -o "([0-9]{1,3}[\.]){3}[0-9]{1,3}" /etc/columnstore/Columnstore.xml) #source_host_names=$(grep "" /etc/columnstore/Columnstore.xml) cmapi_key="$(grep "x-api-key" $CS_CONFIGS_PATH/cmapi_server.conf | awk '{print $3}' | tr -d "'" )"; @@ -191,13 +183,16 @@ load_default_backup_variables() s3_url="" no_verify_ssl=false + # Deletes backups older than this variable retention_days + retention_days=0 + # Tracks if flush read lock has been run read_lock=false incremental=false columnstore_online=false confirm_xmllint_installed - + # Number of DBroots # Integer usually 1 or 3 DBROOT_COUNT=$(xmllint --xpath "string(//DBRootCount)" $CS_CONFIGS_PATH/Columnstore.xml) @@ -205,7 +200,7 @@ load_default_backup_variables() } parse_backup_variables() -{ +{ # Dynamic Arguments while [[ $# -gt 0 ]]; do key="$1" @@ -304,7 +299,7 @@ parse_backup_variables() shift # past argument ;; -nv-ssl| --no-verify-ssl) - no_verify_ssl=true + no_verify_ssl=true shift # past argument ;; -pi| --poll-interval) @@ -317,6 +312,11 @@ parse_backup_variables() shift # past argument shift # past value ;; + -r|--retention-days) + retention_days="$2" + shift # past argument + shift # past value + ;; -h|--help|-help|help) print_backup_help_text; exit 1; @@ -337,7 +337,7 @@ parse_backup_variables() print_backup_help_text() { - echo " + echo " Columnstore Backup -bl | --backup-location Directory where the backup will be saved @@ -347,7 +347,7 @@ print_backup_help_text() -url | --endpoint-url Onprem url to s3 storage api example: http://127.0.0.1:8000 -nv-ssl| --no-verify-ssl Skips verifying ssl certs, useful for onpremise s3 storage -s | --storage The storage used by columnstore data 'LocalStorage' or 'S3' - -i | --incremental Adds columnstore deltas to an existing full backup + -i | --incremental Adds columnstore deltas to an existing full backup [ , auto_most_recent ] -P | --parallel Number of parallel rsync/compression threads to run -f | --config-file Path to backup configuration file to load variables from -sbrm | --skip-save-brm Skip saving brm prior to running a backup - ideal for dirty backups @@ -360,22 +360,23 @@ print_backup_help_text() -q | --quiet Silence verbose copy command outputs -c | --compress Compress backup in X format - Options: [ pigz ] -nb | --name-backup Define the name of the backup - default: date +%m-%d-%Y + -r | --retention-days Retain backups created within the last X days, the rest are deleted, default 0 = keep all backups -ha | --highavilability Hint wether shared storage is attached @ below on all nodes to see all data HA LocalStorage ( /var/lib/columnstore/dataX/ ) - HA S3 ( /var/lib/columnstore/storagemanager/ ) + HA S3 ( /var/lib/columnstore/storagemanager/ ) Local Storage Examples: ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage -P 8 - ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage --incremental 02-18-2022 + ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage --incremental auto_most_recent ./$0 backup -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -s LocalStorage - S3 Examples: + S3 Examples: ./$0 backup -bb s3://my-cs-backups -s S3 ./$0 backup -bb s3://my-cs-backups -c pigz --quiet -sb - ./$0 backup -bb gs://my-cs-backups -s S3 --incremental 02-18-2022 + ./$0 backup -bb gs://my-cs-backups -s S3 --incremental 12-18-2023 ./$0 backup -bb s3://my-onpremise-bucket -s S3 -url http://127.0.0.1:8000 - + Cron Example: */60 */24 * * * root bash /root/$0 -bb s3://my-cs-backups -s S3 >> /root/csBackup.log 2>&1 "; @@ -405,23 +406,24 @@ print_backup_variables() printf "%-${s1}s %-${s2}s\n" "Highly Available:" "$HA"; printf "%-${s1}s %-${s2}s\n" "Incremental:" "$incremental"; printf "%-${s1}s %-${s2}s\n" "Timestamp:" "$(date +%m-%d-%Y-%H%M%S)"; + printf "%-${s1}s %-${s2}s\n" "Retention:" "$retention_days"; if [[ -n "$compress_format" ]]; then printf "%-${s1}s %-${s2}s\n" "Compression:" "true"; printf "%-${s1}s %-${s2}s\n" "Compression Format:" "$compress_format"; printf "%-${s1}s %-${s2}s\n" "Compression Threads:" "$PARALLEL_THREADS"; - else + else printf "%-${s1}s %-${s2}s\n" "Parallel Enabled:" "$parrallel_rsync"; if $parrallel_rsync ; then printf "%-${s1}s %-${s2}s\n" "Parallel Threads:" "$PARALLEL_THREADS"; fi; fi if [ $storage == "LocalStorage" ]; then printf "%-${s1}s %-${s2}s\n" "Backup Destination:" "$backup_destination"; - if [ $backup_destination == "Remote" ]; then printf "%-${s1}s %-${s2}s\n" "scp:" "$scp"; fi; + if [ $backup_destination == "Remote" ]; then printf "%-${s1}s %-${s2}s\n" "scp:" "$scp"; fi; printf "%-${s1}s %-${s2}s\n" "Backup Location:" "$backup_location"; fi - if [ $storage == "S3" ]; then + if [ $storage == "S3" ]; then printf "%-${s1}s %-${s2}s\n" "Active Bucket:" "$bucket"; printf "%-${s1}s %-${s2}s\n" "Backup Bucket:" "$backup_bucket"; fi @@ -429,7 +431,7 @@ print_backup_variables() } check_package_managers() { - + package_manager=''; if command -v apt &> /dev/null ; then if ! command -v dpkg-query &> /dev/null ; then @@ -441,21 +443,21 @@ check_package_managers() { if command -v yum &> /dev/null ; then package_manager="yum"; - fi + fi - if [ $package_manager == '' ]; then + if [ $package_manager == '' ]; then handle_failed_dependencies "[!!] No package manager found: yum or apt must be installed" exit 1; fi; } confirm_xmllint_installed() { - + if ! command -v xmllint > /dev/null; then printf "[!] xmllint not installed ... attempting auto install\n\n" check_package_managers case $package_manager in - yum ) + yum ) install_command="yum install libxml2 -y"; ;; apt ) @@ -479,7 +481,7 @@ confirm_rsync_installed() { printf "[!] rsync not installed ... attempting auto install\n\n" check_package_managers case $package_manager in - yum ) + yum ) install_command="yum install rsync -y"; ;; apt ) @@ -505,7 +507,7 @@ confirm_mariadb_backup_installed() { printf "[!] mariadb-backup not installed ... attempting auto install\n\n" check_package_managers case $package_manager in - yum ) + yum ) install_command="yum install MariaDB-backup -y"; ;; apt ) @@ -529,7 +531,7 @@ confirm_pigz_installed() { printf "[!] pigz not installed ... attempting auto install\n\n" check_package_managers case $package_manager in - yum ) + yum ) install_command="yum install pigz -y"; ;; apt ) @@ -547,13 +549,13 @@ confirm_pigz_installed() { fi } -check_for_dependancies() +check_for_dependancies() { # Check pidof works if [ $mode != "indirect" ] && ! command -v pidof > /dev/null; then handle_failed_dependencies "\n\n[!] Please make sure pidof is installed and executable\n\n" fi - + # used for save_brm and defining columnstore_user if ! command -v stat > /dev/null; then handle_failed_dependencies "\n\n[!] Please make sure stat is installed and executable\n\n" @@ -562,23 +564,23 @@ check_for_dependancies() confirm_rsync_installed confirm_mariadb_backup_installed - if [ $1 == "backup" ] && [ $mode != "indirect" ] && ! command -v dbrmctl > /dev/null; then + if [ $1 == "backup" ] && [ $mode != "indirect" ] && ! command -v dbrmctl > /dev/null; then handle_failed_dependencies "\n\n[!] dbrmctl unreachable to issue lock \n\n" fi if [ $storage == "S3" ]; then - + # Default cloud cloud="aws" - + # Critical argument for S3 - determine which cloud - if [ -z "$backup_bucket" ]; then handle_failed_dependencies "\n undefined --backup_bucket: $backup_bucket \nfor examples see: ./$0 backup --help\n"; fi + if [ -z "$backup_bucket" ]; then handle_failed_dependencies "\n\n[!] Undefined --backup-bucket: $backup_bucket \nfor examples see: ./$0 backup --help\n\n"; fi if [[ $backup_bucket == gs://* ]]; then cloud="gcp"; protocol="gs"; elif [[ $backup_bucket == s3://* ]]; then cloud="aws"; protocol="s3"; - else - handle_failed_dependencies "\n Invalid --backup_bucket - doesnt lead with gs:// or s3:// - $backup_bucket\n"; + else + handle_failed_dependencies "\n\n[!] Invalid --backup-bucket - doesnt lead with gs:// or s3:// - $backup_bucket\n\n"; fi if [ $cloud == "gcp" ]; then @@ -589,7 +591,7 @@ check_for_dependancies() gsutil=$(which mcs_gsutil 2>/dev/null) elif ! command -v gsutil > /dev/null; then which gsutil - echo "Hints: + echo "Hints: curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-443.0.0-linux-x86_64.tar.gz tar -xvf google-cloud-cli-443.0.0-linux-x86_64.tar.gz ./google-cloud-sdk/install.sh -q @@ -600,14 +602,14 @@ check_for_dependancies() or B) gcloud auth activate-service-account --key-file=user-file.json " - handle_failed_dependencies "\n\nPlease make sure gsutil cli is installed configured and executable\n" - else + handle_failed_dependencies "\n\n[!] Please make sure gsutil cli is installed configured and executable\n\n" + else gsutil=$(which gsutil 2>/dev/null) fi - + # gsutil sytax for silent if $quiet; then xtra_s3_args+="-q"; fi - else + else # on prem S3 will use aws cli # If AWS - Check aws-cli installed @@ -617,26 +619,26 @@ check_for_dependancies() awscli=$(which mcs_aws 2>/dev/null) elif ! command -v aws > /dev/null; then which aws - echo "Hints: + echo "Hints: curl \"https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip\" -o \"awscliv2.zip\" unzip awscliv2.zip sudo ./aws/install aws configure" - handle_failed_dependencies "\n\n Please make sure aws cli is installed configured and executable\nSee existing .cnf aws credentials with: grep aws_ $STORAGEMANGER_CNF \n\n" + handle_failed_dependencies "\n\n[!] Please make sure aws cli is installed configured and executable\nSee existing .cnf aws credentials with: grep aws_ $STORAGEMANGER_CNF \n\n" else awscli=$(which aws 2>/dev/null) fi - + # aws sytax for silent if $quiet; then xtra_s3_args+="--quiet"; fi fi; fi } -validation_prechecks_for_backup() +validation_prechecks_for_backup() { - echo "Prechecks ..." + echo "Prechecks" # Adjust rsync for incremental copy additional_rysnc_flags="" @@ -653,19 +655,19 @@ validation_prechecks_for_backup() # Detect if columnstore online if [ $mode == "direct" ]; then - if [ -z $(pidof PrimProc) ] || [ -z $(pidof WriteEngineServer) ]; then - printf " - Columnstore is OFFLINE \n"; - export columnstore_online=false; - else - printf " - Columnstore is ONLINE - safer if offline \n"; - export columnstore_online=true; + if [ -z $(pidof PrimProc) ] || [ -z $(pidof WriteEngineServer) ]; then + printf " - Columnstore is OFFLINE \n"; + export columnstore_online=false; + else + printf " - Columnstore is ONLINE - safer if offline \n"; + export columnstore_online=true; fi fi; # Validate compression option if [[ -n "$compress_format" ]]; then case $compress_format in - pigz) + pigz) confirm_pigz_installed ;; *) # unknown option @@ -683,78 +685,215 @@ validation_prechecks_for_backup() fi # Storage Based Checks - if [ $storage == "LocalStorage" ]; then + if [ $storage == "LocalStorage" ]; then # Incremental Job checks - if $incremental; then + if $incremental; then + + # $backup_location must exist to find an existing full back to add to + if [ ! -d $backup_location ]; then + handle_early_exit_on_backup "[X] Backup directory ($backup_location) DOES NOT exist ( -bl ) \n\n" true; + fi + + if [ "$today" == "auto_most_recent" ]; then + auto_select_most_recent_backup_for_incremental + fi + + # Validate $today is a non-empty value + if [ -z "$today" ]; then + handle_early_exit_on_backup "\nUndefined folder to increment on ($backup_location$today)\nTry --incremental or --incremental auto_most_recent \n" true + fi + # Cant continue if this folder (which represents a full backup) doesnt exists - if [ $backup_destination == "Local" ]; then - if [ -d $backup_location$today ]; then - printf "[+] Full backup directory exists\n"; - else - handle_early_exit_on_backup "[X] Full backup directory ($backup_location$today) DOES NOT exist \n\n" true; + if [ $backup_destination == "Local" ]; then + if [ -d $backup_location$today ]; then + printf " - Full backup directory exists\n"; + else + handle_early_exit_on_backup "[X] Full backup directory ($backup_location$today) DOES NOT exist \n\n" true; fi; elif [ $backup_destination == "Remote" ]; then - if [[ $(ssh $scp test -d $backup_location$today && echo exists) ]]; then - printf "[+] Full backup directory exists\n"; - else - handle_early_exit_on_backup "[X] Full backup directory ($backup_location$today) DOES NOT exist on remote $scp \n\n" true; + if [[ $(ssh $scp test -d $backup_location$today && echo exists) ]]; then + printf " - Full backup directory exists\n"; + else + handle_early_exit_on_backup "[X] Full backup directory ($backup_location$today) DOES NOT exist on remote $scp \n\n" true; fi fi fi elif [ $storage == "S3" ]; then - + # Adjust s3api flags for onpremise/custom endpoints add_s3_api_flags="" if [ -n "$s3_url" ]; then add_s3_api_flags+=" --endpoint-url $s3_url"; fi; if $no_verify_ssl; then add_s3_api_flags+=" --no-verify-ssl"; fi; - + # Validate addtional relevant arguments for S3 if [ -z "$backup_bucket" ]; then echo "Invalid --backup_bucket: $backup_bucket - is empty"; exit 1; fi # Check cli access to bucket if [ $cloud == "gcp" ]; then - + if $gsutil ls $backup_bucket > /dev/null ; then printf " - Success listing backup bucket\n" - else + else printf "\n[X] Failed to list bucket contents... \nCheck $gsutil credentials: $gsutil config -a" handle_early_exit_on_backup "\n$gsutil ls $backup_bucket \n\n" true fi - else + else # Check aws cli access to bucket if $( $awscli $add_s3_api_flags s3 ls $backup_bucket > /dev/null ) ; then printf " - Success listing backup bucket\n" - else + else printf "\n[X] Failed to list bucket contents... \nCheck aws cli credentials: aws configure" handle_early_exit_on_backup "\naws $add_s3_api_flags s3 ls $backup_bucket \n" true fi fi; - # Incremental Job checks + # Incremental Job checks if $incremental; then + if [ "$today" == "auto_most_recent" ]; then + auto_select_most_recent_backup_for_incremental + fi + + # Validate $today is a non-empty value + if [ -z "$today" ]; then + handle_early_exit_on_backup "\nUndefined folder to increment on ($backup_bucket/$today)\nTry --incremental or --incremental auto_most_recent \n" true + fi + # Cant continue if this folder (which represents a full backup) doesnt exists if [ $cloud == "gcp" ]; then - if [[ $( $gsutil ls $backup_bucket/$today | head ) ]]; then - printf "[+] Full backup directory exists\n"; - else - handle_early_exit_on_backup "[X] Full backup directory ($backup_bucket/$today) DOES NOT exist in GCS \nCheck - $gsutil ls $backup_bucket/$today | head \n\n" true; - fi + if [[ $( $gsutil ls $backup_bucket/$today | head ) ]]; then + printf " - Full backup directory exists\n"; + else + handle_early_exit_on_backup "[X] Full backup directory ($backup_bucket/$today) DOES NOT exist in GCS \nCheck - $gsutil ls $backup_bucket/$today | head \n\n" true; + fi else - if [[ $( $awscli $add_s3_api_flags s3 ls $backup_bucket/$today | head ) ]]; then - printf "[+] Full backup directory exists\n"; - else - handle_early_exit_on_backup "[X] Full backup directory ($backup_bucket/$today) DOES NOT exist in S3 \nCheck - aws $add_s3_api_flags s3 ls $backup_bucket/$today | head \n\n" true; - fi + if [[ $( $awscli $add_s3_api_flags s3 ls $backup_bucket/$today/ | head ) ]]; then + printf " - Full backup directory exists\n"; + else + handle_early_exit_on_backup "[X] Full backup directory ($backup_bucket/$today) DOES NOT exist in S3 \nCheck - aws $add_s3_api_flags s3 ls $backup_bucket/$today | head \n\n" true; + fi fi; fi - else + else handle_early_exit_on_backup "Invalid Variable storage: $storage" true fi } +# Used when "--incremental auto_most_recent" passed in during incremental backups +# This function identifies which backup directory is the most recent and sets today=x so that the incremental backup applies to said last full backup +# For LocalStorage: based on ls -td | head -n 1 +# For S3: using the awscli/gsutil, compare the dates of the backup folders restoreS3.job file to find the most recent S3 backup to increment ont top off +auto_select_most_recent_backup_for_incremental() { + + printf " - Searching for most recent backup ...." + if [ $storage == "LocalStorage" ]; then + most_recent_backup=$(ls -td "${backup_location}"* 2>/dev/null | head -n 1) + if [[ -z "$most_recent_backup" ]]; then + handle_early_exit_on_backup "\n[!!!] No backup found to increment in '$backup_location', please run a full backup or define a folder that exists --incremental \n" true + else + today=$(basename $most_recent_backup 2>/dev/null) + fi + + elif [ $storage == "S3" ]; then + current_date=$(date +%s) + backups=$(s3ls $backup_bucket) + most_recent_backup="" + most_recent_backup_time_diff=$((2**63 - 1)); + + while IFS= read -r line; do + + folder=$(echo "$line" | awk '{print substr($2, 1, length($2)-1)}') + date_time=$(s3ls "${backup_bucket}/${folder}/restore --recursive" | awk '{print $1,$2}') + + if [[ -n "$date_time" ]]; then + + # Parse the date + backup_date=$(date -d "$date_time" +%s) + + # Calculate the difference in days + time_diff=$(( (current_date - backup_date) )) + # echo "date_time: $date_time" + # echo "backup_date: $backup_date" + # echo "time_diff: $time_diff" + # echo "days_diff: $((time_diff / (60*60*24) ))" + + if [ $time_diff -lt $most_recent_backup_time_diff ]; then + most_recent_backup=$folder + most_recent_backup_time_diff=$time_diff + fi + fi + printf "." + done <<< "$backups" + + # printf "\n\nMost Recent: $most_recent_backup \n" + # printf "Time Diff: $most_recent_backup_time_diff \n" + + if [[ -z "$most_recent_backup" ]]; then + handle_early_exit_on_backup "\n[!!!] No backup found to increment, please run a full backup or define a folder that exists --incremental \n" true + exit 1; + else + today=$most_recent_backup + fi + fi + printf " selected: $today \n" +} + +apply_backup_retention_policy() { + + if [ $retention_days -eq 0 ]; then + printf " - Skipping Backup Rentention Policy\n" + return 0; + fi + + printf " - Applying Backup Rentention Policy...." + if [ $storage == "LocalStorage" ]; then + # example: find /tmp/backups/ -mindepth 1 -maxdepth 1 -type d -name "*" -amin +0 + find "$backup_location" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec rm -r {} \; + + elif [ $storage == "S3" ]; then + + current_date=$(date +%s) + backups=$(s3ls $backup_bucket) + + while IFS= read -r line; do + + delete_backup=false + folder=$(echo "$line" | awk '{print substr($2, 1, length($2)-1)}') + date_time=$(s3ls "${backup_bucket}/${folder}/restore --recursive" | awk '{print $1,$2}') + + if [[ -n "$date_time" ]]; then + + # Parse the date + backup_date=$(date -d "$date_time" +%s) + + # Calculate the difference in days + days_diff=$(( (current_date - backup_date) / (60*60*24) )) + # echo "line: $line" + # echo "date_time: $date_time" + # echo "backup_date: $backup_date" + # echo "days_diff: $days_diff" + + if [ $days_diff -gt "$retention_days" ]; then + delete_backup=true + fi + else + delete_backup=true + fi + + if $delete_backup; then + s3rm "${backup_bucket}/${folder}" + #echo "Deleting ${backup_bucket}/${folder}" + fi + printf "." + + done <<< "$backups" + fi + printf " Done\n" + +} + cs_read_only_wait_loop() { retry_limit=1800 retry_counter=0 @@ -766,7 +905,7 @@ cs_read_only_wait_loop() { printf "." current_status=$(dbrmctl status); if [ $? -ne 0 ]; then - handle_early_exit_on_backup "\n[!] Failed to get dbrmctl status\n\n" + handle_early_exit_on_backup "\n[!] Failed to get dbrmctl status\n\n" fi if [ $retry_counter -ge $retry_limit ]; then handle_early_exit_on_backup "\n[!] Set columnstore readonly wait retry limit exceeded: $retry_counter \n\n" @@ -777,18 +916,18 @@ cs_read_only_wait_loop() { printf "Done\n" } -# Having columnstore offline is best for non-volatile backups +# Having columnstore offline is best for non-volatile backups # If online - issue a flush tables with read lock and set DBRM to readonly -issue_write_locks() +issue_write_locks() { if [ $mode == "indirect" ] || $skip_locks; then printf "\n"; return; fi; - if ! $skip_mdb && ! pidof mariadbd > /dev/null; then - handle_early_exit_on_backup "\n[X] MariaDB is offline ... Needs to be online to issue read only lock and to run mariadb-backup \n\n" true; + if ! $skip_mdb && ! pidof mariadbd > /dev/null; then + handle_early_exit_on_backup "\n[X] MariaDB is offline ... Needs to be online to issue read only lock and to run mariadb-backup \n\n" true; fi; - printf "\nLocks \n"; + printf "\nLocks \n"; # Pre 23.10.2 CS startreadonly doesnt exist - so poll cpimports added to protect them - if ! $skip_polls; then + if ! $skip_polls; then poll_check_no_active_sql_writes poll_check_no_active_cpimports fi; @@ -800,43 +939,41 @@ issue_write_locks() if mariadb -e "FLUSH TABLES WITH READ LOCK;"; then mariadb -e "set global read_only=ON;" read_lock=true - printf " Done\n"; - else - handle_early_exit_on_backup "\n[X] Failed issuing read-only lock\n\n" - fi + printf " Done\n"; + else + handle_early_exit_on_backup "\n[X] Failed issuing read-only lock\n" + fi fi - - if [ $pm == "pm1" ]; then - + + if [ $pm == "pm1" ]; then + # Set Columnstore ReadOnly Mode startreadonly_exists=$(dbrmctl -h 2>&1 | grep "startreadonly") printf " - Issuing read-only lock to Columnstore Engine ... "; - if ! $columnstore_online; then - printf "Skip since offline\n\n"; + if ! $columnstore_online; then + printf "Skip since offline\n"; elif [ $DBROOT_COUNT == "1" ] && [[ -n "$startreadonly_exists" ]]; then if dbrmctl startreadonly ; then cs_read_only_wait_loop printf " \n"; else - handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via dbrmctl startreadonly\n" + handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via dbrmctl startreadonly\n" fi; - elif [ $DBROOT_COUNT == "1" ]; then - if dbrmctl readonly ; then - printf " \n"; - else - handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via dbrmctl readonly \n" + elif [ $DBROOT_COUNT == "1" ]; then + if ! dbrmctl readonly ; then + handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via dbrmctl readonly \n" fi - else + else if command -v mcs &> /dev/null && command -v jq &> /dev/null ; then if ! mcs cluster set mode --mode readonly | jq -r tostring ; then - handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via cmapi\n" + handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via cmapi\n" fi else # Older CS versions dont have mcs cli cmapiResponse=$(curl -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/mode-set --header 'Content-Type:application/json' --header "x-api-key:$cmapi_key" --data '{"timeout":20, "mode": "readonly"}' -k); - if [[ $cmapiResponse == '{"error":'* ]] ; then - handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via cmapi\n" + if [[ $cmapiResponse == '{"error":'* ]] ; then + handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via cmapi\n" else printf "$cmapiResponse \n"; fi @@ -886,7 +1023,7 @@ poll_check_no_active_cpimports() { printf "Done\n" no_cpimports=true break - else + else printf "." if ! $quiet; then printf "\n$active_cpimports"; fi; sleep "$poll_interval" @@ -899,53 +1036,56 @@ poll_check_no_active_cpimports() { fi; } -run_save_brm() +run_save_brm() { if $skip_save_brm || [ $pm != "pm1" ] || ! $columnstore_online || [ $mode == "indirect" ]; then return; fi; printf "\nBlock Resolution Manager\n" - local tmpDir="/tmp/DBRMbackup-$today" - if [ ! -d "$tmpDir" ]; then mkdir -p $tmpDir; fi; + local tmp_dir="/tmp/DBRMbackup-$today" + if [ ! -d "$tmp_dir" ]; then mkdir -p $tmp_dir; fi; # Copy extent map locally just in case save_brm fails - if [ $storage == "LocalStorage" ]; then - cp -R $DBRM_PATH/* $tmpDir - elif [ $storage == "S3" ]; then - cp -R $STORAGEMANAGER_PATH/* $tmpDir + if [ $storage == "LocalStorage" ]; then + printf " - Backing up DBRMs @ $tmp_dir ... " + cp -R $DBRM_PATH/* $tmp_dir + printf " Done \n" + elif [ $storage == "S3" ]; then + printf " - Backing up minimal DBRMs @ $tmp_dir ... " # Base Set - eval "smcat /data1/systemFiles/dbrm/BRM_saves_current > $tmpDir/BRM_saves_current $xtra_cmd_args" - eval "smcat /data1/systemFiles/dbrm/BRM_saves_journal > $tmpDir/BRM_saves_journal $xtra_cmd_args" - eval "smcat /data1/systemFiles/dbrm/BRM_saves_em > $tmpDir/BRM_saves_em $xtra_cmd_args" - eval "smcat /data1/systemFiles/dbrm/BRM_saves_vbbm > $tmpDir/BRM_saves_vbbm $xtra_cmd_args" - eval "smcat /data1/systemFiles/dbrm/BRM_saves_vss > $tmpDir/BRM_saves_vss $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_saves_current 2>/dev/null > $tmp_dir/BRM_saves_current $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_saves_journal 2>/dev/null > $tmp_dir/BRM_saves_journal $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_saves_em 2>/dev/null > $tmp_dir/BRM_saves_em $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_saves_vbbm 2>/dev/null > $tmp_dir/BRM_saves_vbbm $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_saves_vss 2>/dev/null > $tmp_dir/BRM_saves_vss $xtra_cmd_args" # A Set - eval "smcat /data1/systemFiles/dbrm/BRM_savesA_em > $tmpDir/BRM_savesA_em $xtra_cmd_args" - eval "smcat /data1/systemFiles/dbrm/BRM_savesA_vbbm > $tmpDir/BRM_savesA_vbbm $xtra_cmd_args" - eval "smcat /data1/systemFiles/dbrm/BRM_savesA_vss > $tmpDir/BRM_savesA_vss $xtra_cmd_args" - + eval "smcat /data1/systemFiles/dbrm/BRM_savesA_em 2>/dev/null > $tmp_dir/BRM_savesA_em $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_savesA_vbbm 2>/dev/null > $tmp_dir/BRM_savesA_vbbm $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_savesA_vss 2>/dev/null > $tmp_dir/BRM_savesA_vss $xtra_cmd_args" + # B Set - eval "smcat /data1/systemFiles/dbrm/BRM_savesB_em > $tmpDir/BRM_savesB_em $xtra_cmd_args" - eval "smcat /data1/systemFiles/dbrm/BRM_savesB_vbbm > $tmpDir/BRM_savesB_vbbm $xtra_cmd_args" - eval "smcat /data1/systemFiles/dbrm/BRM_savesB_vss > $tmpDir/BRM_savesB_vss $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_savesB_em 2>/dev/null > $tmp_dir/BRM_savesB_em $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_savesB_vbbm 2>/dev/null > $tmp_dir/BRM_savesB_vbbm $xtra_cmd_args" + eval "smcat /data1/systemFiles/dbrm/BRM_savesB_vss 2>/dev/null > $tmp_dir/BRM_savesB_vss $xtra_cmd_args" + printf " Done \n" fi printf " - Saving BRMs... \n" - brmOwner=$(stat -c "%U" $DBRM_PATH/) - if sudo -u $brmOwner /usr/bin/save_brm ; then - rm -rf $tmpDir - else - printf "\n Failed: save_brm error - see /var/log/messages - backup @ $tmpDir \n\n"; - handle_early_exit_on_backup + brm_owner=$(stat -c "%U" $DBRM_PATH/) + if sudo -u $brm_owner /usr/bin/save_brm ; then + rm -rf $tmp_dir + else + printf "\n Failed: save_brm error - see /var/log/messages - backup @ $tmpDir \n\n"; + handle_early_exit_on_backup fi - + } # Example: s3sync # Example: s3sync s3://$bucket $backup_bucket/$today/columnstoreData "Done - Columnstore data sync complete" -s3sync() +s3sync() { local from=$1 @@ -954,21 +1094,25 @@ s3sync() local failed_message=$4 local retries=${5:-0} local cmd="" - + if [ $cloud == "gcp" ]; then cmd="$gsutil $xtra_s3_args -m rsync -r -d $from $to" # gsutil throws WARNINGS if not directed to /dev/null if $quiet; then cmd+=" 2>/dev/null"; fi - else + eval "$cmd" + else # Default AWS cmd="$awscli $xtra_s3_args $add_s3_api_flags s3 sync $from $to" + $cmd fi - if eval $cmd; then + local exit_code=$? + + if [ $exit_code -eq 0 ]; then if [ -n "$success_message" ]; then printf "$success_message"; fi; else - if [ $retries -lt $RETRY_LIMIT ]; then + if [ $retries -lt $RETRY_LIMIT ]; then echo "$cmd" echo "Retrying: $retries" sleep 1; @@ -988,10 +1132,10 @@ s3cp() local from=$1 local to=$2 local cmd="" - + if [ $cloud == "gcp" ]; then cmd="$gsutil $xtra_s3_args cp $from $to" - else + else # Default AWS cmd="$awscli $xtra_s3_args $add_s3_api_flags s3 cp $from $to" fi; @@ -1000,15 +1144,15 @@ s3cp() } -# Example: s3rm +# Example: s3rm s3rm() { local path=$1 local cmd="" - + if [ $cloud == "gcp" ]; then cmd="$gsutil $xtra_s3_args -m rm -r $path" - else + else # Default AWS cmd="$awscli $xtra_s3_args $add_s3_api_flags s3 rm $path --recursive" fi; @@ -1016,15 +1160,15 @@ s3rm() $cmd } -# Example: s3ls +# Example: s3ls s3ls() { local path=$1 local cmd="" - + if [ $cloud == "gcp" ]; then cmd="$gsutil ls $path" - else + else # Default AWS cmd="$awscli $add_s3_api_flags s3 ls $path" fi; @@ -1032,19 +1176,19 @@ s3ls() $cmd } -clear_read_lock() +clear_read_lock() { if [ $mode == "indirect" ] || $skip_locks; then return; fi; - printf "\nClearing Locks\n"; - # Clear MDB Lock + printf "\nClearing Locks\n"; + # Clear MDB Lock if pidof mariadbd > /dev/null && [ $read_lock ]; then printf " - Clearing read-only lock on MariaDB Server ... "; if mariadb -e "UNLOCK TABLES;" && mariadb -qsNe "set global read_only=$ORIGINAL_READONLY_STATUS;"; then read_lock=false; printf " Done\n" - else + else handle_early_exit_on_backup "\n[X] Failed clearing readLock\n" true fi fi @@ -1052,27 +1196,27 @@ clear_read_lock() if [ $pm == "pm1" ]; then # Clear CS Lock - printf " - Clearing read-only lock on Columnstore Engine ... "; - if ! $columnstore_online; then + printf " - Clearing read-only lock on Columnstore Engine ... "; + if ! $columnstore_online; then printf "Skip since offline\n" - elif [ $DBROOT_COUNT == "1" ]; then + elif [ $DBROOT_COUNT == "1" ]; then if dbrmctl readwrite ; then printf " "; - else + else handle_early_exit_on_backup "\n[X] Failed clearing columnstore BRM lock via dbrmctl\n" true; fi elif command -v mcs &> /dev/null && command -v jq &> /dev/null ; then if ! mcs cluster set mode --mode readwrite | jq -r tostring ;then - handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via cmapi\n"; + handle_early_exit_on_backup "\n[X] Failed issuing columnstore BRM lock via cmapi\n"; fi - elif curl -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/mode-set --header 'Content-Type:application/json' --header "x-api-key:$cmapi_key" --data '{"timeout":20, "mode": "readwrite"}' -k ; then + elif curl -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/mode-set --header 'Content-Type:application/json' --header "x-api-key:$cmapi_key" --data '{"timeout":20, "mode": "readwrite"}' -k ; then printf " \n" else handle_early_exit_on_backup "\n[X] Failed clearing columnstore BRM lock\n" true; fi fi - -} + +} # handle_ called when certain checks/functionality fails handle_failed_dependencies() @@ -1085,7 +1229,7 @@ handle_failed_dependencies() # first argument is the error message # 2nd argument true = skip clear_read_lock, false= dont skip handle_early_exit_on_backup() -{ +{ skip_clear_locks=${2:-false} if ! $skip_clear_locks; then clear_read_lock; fi; printf "\nBackup Failed: $1\n" @@ -1123,11 +1267,11 @@ wait_on_rsync() local total=0 local w=0; - while [ $rsyncInProgress -gt "$concurrentThreshHold" ]; do - if ! $quiet && $visualize && [ $(($w % 10)) -eq 0 ]; then - if [[ $total == 0 ]]; then - total=$(du -sh /var/lib/columnstore/data$dbrootToSync/) - else + while [ $rsyncInProgress -gt "$concurrentThreshHold" ]; do + if ! $quiet && $visualize && [ $(($w % 10)) -eq 0 ]; then + if [[ $total == 0 ]]; then + total=$(du -sh /var/lib/columnstore/data$dbrootToSync/) + else echo -E "$rsyncInProgress rsync processes running ... seconds: $w" echo "Status: $(du -sh $backup_location$today/data$dbrootToSync/)" echo "Goal: $total" @@ -1141,12 +1285,12 @@ wait_on_rsync() done } -initiate_rsyncs() +initiate_rsyncs() { local dbrootToSync=$1 parallel_rysnc_flags=" -a " if $incremental ; then parallel_rysnc_flags+=" --inplace --no-whole-file --delete"; fi; - + deepParallelRsync /var/lib/columnstore/data$dbrootToSync 1 $DEPTH data$dbrootToSync & sleep 2; #jobs @@ -1154,19 +1298,19 @@ initiate_rsyncs() wait } -# A recursive function that increments depthCurrent+1 each directory it goes deeper and issuing rsync on each directory remaing at the target depth -# Example values: +# A recursive function that increments depthCurrent+1 each directory it goes deeper and issuing rsync on each directory remaing at the target depth +# Example values: # path: /var/lib/columnstore/data1 # depthCurrent: 1 # depthTarget: 3 # depthCurrent: data1 -deepParallelRsync() +deepParallelRsync() { path=$1 depthCurrent=$2 depthTarget=$3 relativePath=$4 - # echo "DEBUG: + # echo "DEBUG: # path=$1 # depthCurrent=$2 # depthTarget=$3 @@ -1186,15 +1330,15 @@ deepParallelRsync() #echo "DEBUG - copy to relative: $backup_location$today/$relativePath/" if ls $fullFilePath | xargs -P $PARALLEL_THREADS -I {} rsync $parallel_rysnc_flags $fullFilePath/{} $backup_location$today/$relativePath/$fileName ; then echo " + Completed: $backup_location$today/$relativePath/$fileName" - else + else echo "Failed: $backup_location$today/$relativePath/$fileName" exit 1; fi - + else - # echo "DEBUG - Fork Deeper - $fullFilePath " + # echo "DEBUG - Fork Deeper - $fullFilePath " wait_on_rsync false "0.5" $PARALLEL_FOLDERS - # Since target depth not reached, recursively call for each directory + # Since target depth not reached, recursively call for each directory deepParallelRsync $fullFilePath "$((depthCurrent+1))" $depthTarget "$relativePath/$fileName" & fi @@ -1202,8 +1346,8 @@ deepParallelRsync() elif [ -f $fullFilePath ]; then rsync $additional_rysnc_flags $fullFilePath $backup_location$today/$relativePath/ - # If filename is * then the directory is empty - elif [ "$fileName" == "*" ]; then + # If filename is * then the directory is empty + elif [ "$fileName" == "*" ]; then # echo "DEBUG - Skipping $relativePath - empty"; continue else @@ -1214,8 +1358,8 @@ deepParallelRsync() } run_backup() -{ - if [ $storage == "LocalStorage" ]; then +{ + if [ $storage == "LocalStorage" ]; then if [ $backup_destination == "Local" ]; then printf "\nLocal Storage Backup\n" @@ -1226,34 +1370,34 @@ run_backup() mkdir -p $backup_location$today else mkdir -p $backup_location$today/mysql - mkdir -p $backup_location$today/configs + mkdir -p $backup_location$today/configs mkdir -p $backup_location$today/configs/mysql - + # Check/Create CS Data Directories i=1 while [ $i -le $DBROOT_COUNT ]; do if [[ $ASSIGNED_DBROOT == "$i" || $HA == true ]]; then mkdir -p $backup_location$today/data$i ; fi ((i++)) - done + done fi - printf " Done\n" - + printf " Done\n" + # Backup Columnstore data i=1 while [ $i -le $DBROOT_COUNT ]; do - if [[ $ASSIGNED_DBROOT == "$i" || $HA == true ]]; then + if [[ $ASSIGNED_DBROOT == "$i" || $HA == true ]]; then if [[ -n "$compress_format" ]]; then # For compression keep track of dirs & files to include and compress/stream in the end - $compress_paths - compress_paths="/var/lib/columnstore/data$i/* " + compress_paths="/var/lib/columnstore/data$i/* " - elif $parrallel_rsync ; then + elif $parrallel_rsync ; then printf " - Parallel Rsync CS Data$i... \n" initiate_rsyncs $i - printf " Done\n" + printf " Done\n" else printf " - Syncing Columnstore Data$i... " eval "rsync $additional_rysnc_flags /var/lib/columnstore/data$i/* $backup_location$today/data$i/ $xtra_cmd_args"; - printf " Done\n" + printf " Done\n" fi; fi ((i++)) @@ -1261,7 +1405,7 @@ run_backup() # Backup MariaDB data if [ $ASSIGNED_DBROOT == "1" ]; then - + # logic to increment mysql and keep count if we want to backup incremental mysql data # i=1 # latestMysqlIncrement=0 @@ -1270,7 +1414,7 @@ run_backup() # done # MariaDB backup wont rerun if folder exists - so clear it before running mariadb-backup - if [ -d $backup_location$today/mysql ]; then + if [ -d $backup_location$today/mysql ]; then rm -rf $backup_location$today/mysql fi @@ -1281,20 +1425,20 @@ run_backup() mbd_prefix="$backup_location$today/$split_file_mdb_prefix.$compress_format" case $compress_format in pigz) - # Handle Cloud + # Handle Cloud if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile; then handle_early_exit_on_backup "\nFailed mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile \n" fi printf " Done @ $mbd_prefix\n" ;; *) # unknown option - handle_early_exit_on_backup "\nUnknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nUnknown compression flag: $compress_format\n" esac else printf " - Copying MariaDB Data... " if eval "mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root $xtra_cmd_args" ; then printf " Done \n"; else printf "\n Failed: mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root\n"; handle_early_exit_on_backup; fi fi - else + else echo "[!] Skipping mariadb-backup" fi @@ -1321,18 +1465,18 @@ run_backup() printf " - Copying MariaDB Configs... " mkdir -p $backup_location$today/configs/mysql/$pm/ eval "rsync $additional_rysnc_flags $MARIADB_SERVER_CONFIGS_PATH/* $backup_location$today/configs/mysql/$pm/ $xtra_cmd_args" - printf " Done\n\n" + printf " Done\n" else compress_paths+=" $MARIADB_SERVER_CONFIGS_PATH/*" fi fi - + # Handle compression for Columnstore Data & Configs if [[ -n "$compress_format" ]]; then cs_prefix="$backup_location$today/$split_file_cs_prefix.$compress_format" compressed_split_size="250M"; case $compress_format in - pigz) + pigz) printf " - Compressing CS Data & Configs... " if ! eval "tar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $cs_prefix 2>> $logfile"; then handle_early_exit_on_backup "[!] - Compression Failed \ntar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $cs_prefix 2>> $logfile \n" @@ -1340,26 +1484,26 @@ run_backup() printf " Done @ $cs_prefix\n" ;; *) # unknown option - handle_early_exit_on_backup "\nUnknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nUnknown compression flag: $compress_format\n" esac fi - if $incremental ; then + if $incremental ; then # Log each incremental run now=$(date "+%m-%d-%Y %H:%M:%S"); echo "$pm updated on $now" >> $backup_location$today/incrementallyUpdated.txt final_message="Incremental Backup Complete" - else + else # Create restore job file extra_flags="" - if [[ -n "$compress_format" ]]; then extra_flags+=" -c $compress_format"; fi; - if $skip_mdb; then extra_flags+=" --skip-mariadb-backup"; fi; + if [[ -n "$compress_format" ]]; then extra_flags+=" -c $compress_format"; fi; + if $skip_mdb; then extra_flags+=" --skip-mariadb-backup"; fi; echo "./$0 restore -l $today -bl $backup_location -bd $backup_destination -s $storage --dbroots $DBROOT_COUNT -m $mode $extra_flags --quiet" > $backup_location$today/restore.job fi final_message+=" @ $backup_location$today" - + elif [ $backup_destination == "Remote" ]; then - + # Check/Create Directories on remote server printf "[+] Checking Remote Directories... " i=1 @@ -1371,16 +1515,16 @@ run_backup() echo $makeDirectories ssh $scp " mkdir -p $backup_location$today/mysql ; $makeDataDirectories - mkdir -p $backup_location$today/configs ; - mkdir -p $backup_location$today/configs/mysql; + mkdir -p $backup_location$today/configs ; + mkdir -p $backup_location$today/configs/mysql; mkdir -p $backup_location$today/configs/mysql/$pm " printf " Done\n" - + printf "[~] Rsync Remote Columnstore Data... \n" i=1 while [ $i -le $DBROOT_COUNT ]; do #if [ $pm == "pm$i" ]; then rsync -a $additional_rysnc_flags /var/lib/columnstore/data$i/* $scp:$backup_location$today/data$i/ ; fi - if [ $pm == "pm$i" ]; then + if [ $pm == "pm$i" ]; then find /var/lib/columnstore/data$i/* -mindepth 1 -maxdepth 2 | xargs -P $PARALLEL_THREADS -I {} rsync $additional_rysnc_flags /var/lib/columnstore/data$i/* $scp:$backup_location$today/data$i/ fi ((i++)) @@ -1399,27 +1543,27 @@ run_backup() if ! $skip_mdb; then printf "[~] Backing up mysql... \n" mkdir -p $backup_location$today - if mariadb-backup --backup --target-dir=$backup_location$today --user=root ; then + if mariadb-backup --backup --target-dir=$backup_location$today --user=root ; then rsync -a $backup_location$today/* $scp:$backup_location$today/mysql rm -rf $backup_location$today - else - printf "\n Failed: mariadb-backup --backup --target-dir=$backup_location$today --user=root\n\n"; - handle_early_exit_on_backup + else + printf "\n Failed: mariadb-backup --backup --target-dir=$backup_location$today --user=root\n\n"; + handle_early_exit_on_backup fi - else + else echo "[!] Skipping mariadb-backup" fi; - + # Backup CS configurations - rsync $additional_rysnc_flags $CS_CONFIGS_PATH/Columnstore.xml $scp:$backup_location$today/configs/ + rsync $additional_rysnc_flags $CS_CONFIGS_PATH/Columnstore.xml $scp:$backup_location$today/configs/ rsync $additional_rysnc_flags $STORAGEMANGER_CNF $scp:$backup_location$today/configs/ - if [ -f $CS_CONFIGS_PATH/cmapi_server.conf ]; then - rsync $additional_rysnc_flags $CS_CONFIGS_PATH/cmapi_server.conf $scp:$backup_location$today/configs/; + if [ -f $CS_CONFIGS_PATH/cmapi_server.conf ]; then + rsync $additional_rysnc_flags $CS_CONFIGS_PATH/cmapi_server.conf $scp:$backup_location$today/configs/; fi fi - - - if $incremental ; then + + + if $incremental ; then now=$(date "+%m-%d-%Y +%H:%M:%S") ssh $scp "echo \"$pm updated on $now\" >> $backup_location$today/incrementallyUpdated.txt" final_message="Incremental Backup Complete" @@ -1432,7 +1576,7 @@ run_backup() elif [ $storage == "S3" ]; then - printf "S3 Backup\n" + printf "\nS3 Backup\n" # Conconsistency check - wait for assigned journal dir to be empty trap handle_ctrl_c_backup SIGINT i=1 @@ -1440,8 +1584,8 @@ run_backup() max_wait=180 printf " - Checking storagemanager/journal/data$ASSIGNED_DBROOT/* " while [[ $j_counts -gt 0 ]]; do - if [ $i -gt $max_wait ]; then handle_early_exit_on_backup "\n[!] max_wait exceeded for $cs_journal/data$ASSIGNED_DBROOT/* to sync with bucket "; fi; - if (( $i%10 == 0 )); then printf "\n[!] Not empty yet - found $j_counts files @ $cs_journal/data$ASSIGNED_DBROOT/*\n"; fi; + if [ $i -gt $max_wait ]; then printf "[!] Maybe you have orphaned journal files, active writes or an unreachable bucket \n"; handle_early_exit_on_backup "\n[!] max_wait exceeded for $cs_journal/data$ASSIGNED_DBROOT/* to sync with bucket "; ls -la $cs_journal/data$ASSIGNED_DBROOT/*; fi; + if (( $i%10 == 0 )); then printf "\n[!] Not empty yet - found $j_counts files @ $cs_journal/data$ASSIGNED_DBROOT/*\n"; printf " - Checking storagemanager/journal/data$ASSIGNED_DBROOT/* "; fi; sleep 1 i=$(($i+1)) j_counts=$(find $cs_journal/data$ASSIGNED_DBROOT/* -type f 2>/dev/null | wc -l) @@ -1453,14 +1597,14 @@ run_backup() if [[ -z "$compress_format" ]]; then printf " - Syncing Columnstore Metadata \n" - if $HA; then + if $HA; then s3sync $STORAGEMANAGER_PATH $backup_bucket/$today/storagemanager " - Done storagemanager/*\n" "\n\n[!!!] sync failed - storagemanager/*\n\n"; - else + else s3sync $cs_cache/data$ASSIGNED_DBROOT $backup_bucket/$today/storagemanager/cache/data$ASSIGNED_DBROOT " + cache/data$ASSIGNED_DBROOT\n" "\n\n[!!!] sync failed - cache/data$ASSIGNED_DBROOT\n\n"; s3sync $cs_metadata/data$ASSIGNED_DBROOT $backup_bucket/$today/storagemanager/metadata/data$ASSIGNED_DBROOT " + metadata/data$ASSIGNED_DBROOT\n" "\n\n[!!!] sync failed - metadata/data$ASSIGNED_DBROOT\n\n" s3sync $cs_journal/data$ASSIGNED_DBROOT $backup_bucket/$today/storagemanager/journal/data$ASSIGNED_DBROOT " + journal/data$ASSIGNED_DBROOT\n" "\n\n[!!!] sync failed - journal/data$ASSIGNED_DBROOT\n\n" fi; - + else # For compression keep track of dirs & files to include and compress/stream in the end - $compress_paths compress_paths="$cs_cache/data$ASSIGNED_DBROOT $cs_metadata/data$ASSIGNED_DBROOT $cs_journal/data$ASSIGNED_DBROOT " @@ -1468,7 +1612,7 @@ run_backup() # PM1 mostly backups everything else if [ $ASSIGNED_DBROOT == "1" ]; then - + # Backup CS configurations if [[ -z "$compress_format" ]]; then s3sync $CS_CONFIGS_PATH/ $backup_bucket/$today/configs/ " + $CS_CONFIGS_PATH/\n" "\n\n[!!!] sync failed - $CS_CONFIGS_PATH/\n\n"; @@ -1480,7 +1624,7 @@ run_backup() if ! $skip_bucket_data; then printf " - Saving Columnstore data ... " s3sync $protocol://$bucket $backup_bucket/$today/columnstoreData " Done \n" - else + else printf " - [!] Skipping columnstore bucket data \n" fi @@ -1492,12 +1636,12 @@ run_backup() mbd_prefix="$backup_bucket/$today/$split_file_mdb_prefix.$compress_format" case $compress_format in pigz) - # Handle Cloud + # Handle Cloud if [ $cloud == "gcp" ]; then if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile" - chunk 2>$logfile; then handle_early_exit_on_backup "\nFailed mariadb-backup --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter=\"gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile\" - chunk 2>$logfile \n" fi - + elif [ $cloud == "aws" ]; then if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2" - chunk 2>$logfile; then handle_early_exit_on_backup "\nFailed mariadb-backup --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter=\"aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2\" - chunk 2>$logfile\n" @@ -1506,7 +1650,7 @@ run_backup() printf " Done @ $mbd_prefix\n" ;; *) # unknown option - handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" esac else @@ -1523,11 +1667,11 @@ run_backup() s3rm $backup_bucket/$today/mysql/ s3sync $backup_location$today/mysql $backup_bucket/$today/mysql/ " + mariadb-backup to bucket @ $backup_bucket/$today/mysql/ \n" rm -rf $backup_location$today/mysql - else + else handle_early_exit_on_backup "\nFailed making directory for MariaDB backup\ncommand: mkdir -p $backup_location$today/mysql\n" fi fi - else + else echo "[!] Skipping mariadb-backup" fi fi @@ -1547,13 +1691,13 @@ run_backup() cs_prefix="$backup_bucket/$today/$split_file_cs_prefix.$compress_format" compressed_split_size="250M"; case $compress_format in - pigz) + pigz) printf " - Compressing CS local files -> bucket ... " if [ $cloud == "gcp" ]; then if ! eval "tar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b $compressed_split_size --filter=\"gsutil cp - ${cs_prefix}_\\\$FILE 2>$logfile 1>&2\" - chunk 2>$logfile"; then handle_early_exit_on_backup "[!] - Compression/Split/Upload Failed \n" fi - + elif [ $cloud == "aws" ]; then if ! eval "tar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b $compressed_split_size --filter=\"aws s3 cp - ${cs_prefix}_\\\$FILE 2>$logfile 1>&2\" - chunk 2>$logfile"; then handle_early_exit_on_backup "[!] - Compression/Split/Upload Failed \n" @@ -1562,15 +1706,15 @@ run_backup() printf " Done @ $cs_prefix\n" ;; *) # unknown option - handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" esac fi # Create restore job file & update incremental log - if $incremental ; then + if $incremental ; then now=$(date "+%m-%d-%Y +%H:%M:%S") increment_file="incremental_history.txt" - + # download S3 file, append to it and reupload if [[ $(s3ls "$backup_bucket/$today/$increment_file") ]]; then s3cp $backup_bucket/$today/$increment_file $increment_file @@ -1583,23 +1727,23 @@ run_backup() else # Create restore job file extra_flags="" - if [[ -n "$compress_format" ]]; then extra_flags+=" -c $compress_format"; fi; - if $skip_mdb; then extra_flags+=" --skip-mariadb-backup"; fi; - if $skip_bucket_data; then extra_flags+=" --skip-bucket-data"; fi; + if [[ -n "$compress_format" ]]; then extra_flags+=" -c $compress_format"; fi; + if $skip_mdb; then extra_flags+=" --skip-mariadb-backup"; fi; + if $skip_bucket_data; then extra_flags+=" --skip-bucket-data"; fi; if [ -n "$s3_url" ]; then extra_flags+=" -url $s3_url"; fi; echo "./$0 restore -l $today -s $storage -bb $backup_bucket -dbs $DBROOT_COUNT -m $mode -nb $protocol://$bucket $extra_flags --quiet --continue" > restoreS3.job s3cp restoreS3.job $backup_bucket/$today/restoreS3.job rm -rf restoreS3.job fi final_message+=" @ $backup_bucket/$today" - + fi - + clear_read_lock end=$(date +%s) runtime=$((end-start)) - printf "\n[+] Runtime: $runtime\n" - printf "[+] $final_message\n\n" + printf "\nRuntime: $runtime\n" + printf "$final_message\n\n" } load_default_restore_variables() @@ -1621,19 +1765,19 @@ load_default_restore_variables() scp="" # What storage topogoly was used by Columnstore in the backup - found in storagemanager.cnf - # Options: "LocalStorage" or "S3" + # Options: "LocalStorage" or "S3" storage="LocalStorage" # Flag for high available systems (meaning shared storage exists supporting the topology so that each node sees all data) HA=false # When set to true skips the enforcement that new_bucket should be empty prior to starting a restore - continue=false + continue=false # modes ['direct','indirect'] - direct backups run on the columnstore nodes themselves. indirect run on another machine that has read-only mounts associated with columnstore/mariadb mode="direct" - # Name of the Configuration file to load variables from + # Name of the Configuration file to load variables from config_file=".cs-backup-config" # Only used if storage=S3 @@ -1815,7 +1959,7 @@ parse_restore_variables() shift # past argument ;; -nv-ssl| --no-verify-ssl) - no_verify_ssl=true + no_verify_ssl=true shift # past argument ;; -h|--help|-help|help) @@ -1834,7 +1978,7 @@ parse_restore_variables() } print_restore_help_text() -{ +{ echo " Columnstore Restore @@ -1848,25 +1992,26 @@ print_restore_help_text() -nv-ssl| --no-verify-ssl) Skips verifying ssl certs, useful for onpremise s3 storage -s | --storage The storage used by columnstore data 'LocalStorage' or 'S3' -pm | --nodeid Forces the handling of the restore as this node as opposed to whats detected on disk - -nb | --new-bucket Defines the new bucket to copy the s3 data to from the backup bucket. + -nb | --new-bucket Defines the new bucket to copy the s3 data to from the backup bucket. Use -nb if the new restored cluster should use a different bucket than the backup bucket itself - -nr | --new-region Defines the region of the new bucket to copy the s3 data to from the backup bucket - -nk | --new-key Defines the aws key to connect to the new_bucket - -ns | --new-secret Defines the aws secret of the aws key to connect to the new_bucket + -nr | --new-region Defines the region of the new bucket to copy the s3 data to from the backup bucket + -nk | --new-key Defines the aws key to connect to the new_bucket + -ns | --new-secret Defines the aws secret of the aws key to connect to the new_bucket -f | --config-file Path to backup configuration file to load variables from -cont| --continue This acknowledges data in your --new_bucket is ok to delete when restoring S3 -smdb| --skip-mariadb-backup Skip restoring mariadb server via mariadb-backup - ideal for only restoring columnstore -sb | --skip-bucket-data Skip restoring columnstore data in the bucket - ideal if looking to only restore mariadb server -q | --quiet Silence verbose copy command outputs -c | --compress Hint that the backup is compressed in X format - Options: [ pigz ] + -P | --parallel Number of parallel decompression and mbstream threads to run -ha | --highavilability Hint for if shared storage is attached @ below on all nodes to see all data HA LocalStorage ( /var/lib/columnstore/dataX/ ) - HA S3 ( /var/lib/columnstore/storagemanager/ ) + HA S3 ( /var/lib/columnstore/storagemanager/ ) Local Storage Examples: ./$0 restore -s LocalStorage -bl /tmp/backups/ -bd Local -l 12-29-2021 ./$0 restore -s LocalStorage -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -l 12-29-2021 - + S3 Storage Examples: ./$0 restore -s S3 -bb s3://my-cs-backups -l 12-29-2021 ./$0 restore -s S3 -bb gs://on-premise-bucket -l 12-29-2021 -url http://127.0.0.1:8000 @@ -1886,7 +2031,7 @@ print_restore_variables() printf "%-${s1}s %-${s2}s\n" "Configuration File:" "$config_file"; source $config_file fi - if [ $mode == "indirect" ] || $skip_mdb || $skip_bucket_data; then + if [ $mode == "indirect" ] || $skip_mdb || $skip_bucket_data; then echo "------------------------------------------------" echo "Skips: MariaDB($skip_mdb) Bucket($skip_bucket_data)"; fi; @@ -1894,10 +2039,11 @@ print_restore_variables() if [[ -n "$compress_format" ]]; then echo "Compression: true" echo "Compression Format: $compress_format"; - else + echo "Decompression Threads:" "$PARALLEL_THREADS"; + else echo "Compression: false" fi - if [ $storage == "LocalStorage" ]; then + if [ $storage == "LocalStorage" ]; then echo "Backup Location: $backup_location" echo "Backup Destination: $backup_destination" echo "Scp: $scp" @@ -1909,7 +2055,7 @@ print_restore_variables() echo "PM Number: $pm_number" elif [ $storage == "S3" ]; then - echo "Backup Location: $backup_location" + echo "Backup Location: $backup_location" echo "Storage: $storage" echo "Load Date: $load_date" echo "timestamp: $(date +%m-%d-%Y-%H%M%S)" @@ -1932,7 +2078,7 @@ print_restore_variables() validation_prechecks_for_restore() { echo "Prechecks ..." - if [ $storage != "LocalStorage" ] && [ $storage != "S3" ]; then handle_early_exit_on_restore "Invalid script variable storage: $storage\n"; fi + if [ $storage != "LocalStorage" ] && [ $storage != "S3" ]; then handle_early_exit_on_restore "Invalid script variable storage: $storage\n"; fi if [ -z "$load_date" ]; then handle_early_exit_on_restore "\n[!!!] Required field --load: $load_date - is empty\n" ; fi if [ -z "$mode" ]; then handle_early_exit_on_restore "\n[!!!] Required field --mode: $mode - is empty\n" ; fi if [ "$mode" != "direct" ] && [ "$mode" != "indirect" ] ; then handle_early_exit_on_restore "\n[!!!] Invalid field --mode: $mode\n"; fi @@ -1944,7 +2090,7 @@ validation_prechecks_for_restore() { if [ -n "$s3_url" ]; then add_s3_api_flags+=" --endpoint-url $s3_url"; fi if $no_verify_ssl; then add_s3_api_flags+=" --no-verify-ssl"; fi; - # If remote backup - Validate that scp works + # If remote backup - Validate that scp works if [ $backup_destination == "Remote" ]; then if ssh $scp echo ok 2>&1 ;then printf 'SSH Works\n\n' @@ -1954,17 +2100,17 @@ validation_prechecks_for_restore() { fi # Make sure the database is offline - if [ "$mode" == "direct" ]; then - if [ -z $(pidof PrimProc) ]; then - printf " - Columnstore Status ... Offline\n"; - else - handle_early_exit_on_restore "\n[X] Columnstore is ONLINE - please turn off \n\n"; + if [ "$mode" == "direct" ]; then + if [ -z $(pidof PrimProc) ]; then + printf " - Columnstore Status ... Offline\n"; + else + handle_early_exit_on_restore "\n[X] Columnstore is ONLINE - please turn off \n\n"; fi - - if [ -z $(pidof mariadbd) ]; then - printf " - MariaDB Server Status ... Offline\n"; - else - handle_early_exit_on_restore "\n[X] MariaDB is ONLINE - please turn off \n\n"; + + if [ -z $(pidof mariadbd) ]; then + printf " - MariaDB Server Status ... Offline\n"; + else + handle_early_exit_on_restore "\n[X] MariaDB is ONLINE - please turn off \n\n"; fi fi @@ -1972,7 +2118,7 @@ validation_prechecks_for_restore() { check_package_managers cmapi_installed_command="" case $package_manager in - yum ) + yum ) cmapi_installed_command="yum list installed MariaDB-columnstore-cmapi &> /dev/null;"; ;; apt ) @@ -1984,33 +2130,33 @@ validation_prechecks_for_restore() { esac if eval $cmapi_installed_command ; then - if [ -z $(pidof /usr/share/columnstore/cmapi/python/bin/python3) ]; then - printf " - Columnstore Management API Status .. Offline\n"; - else - handle_early_exit_on_restore "\n[X] Cmapi is ONLINE - please turn off \n\n"; + if ! sudo mcs cmapi is-ready ; then + printf " - Columnstore Management API Status .. Offline\n"; + else + handle_early_exit_on_restore "\n[X] Cmapi is ONLINE - please turn off \n\n"; fi else - printf " - Columnstore Management API Status .. Not Installed - Skipping\n"; + printf " - Columnstore Management API Status .. Not Installed - Skipping\n"; fi # Validate addtional relevant arguments per storage option - if [ $storage == "LocalStorage" ]; then - + if [ $storage == "LocalStorage" ]; then + if [ -z "$backup_location" ]; then handle_early_exit_on_restore "Invalid --backup_location: $backup_location - is empty"; fi if [ -z "$backup_destination" ]; then handle_early_exit_on_restore "Invalid --backup_destination: $backup_destination - is empty"; fi if [ $backup_destination == "Remote" ] && [ -d $backup_location$load_date ]; then echo "Switching to '-bd Local'"; backup_destination="Local"; fi if [ $backup_destination == "Local" ]; then - + if [ ! -d $backup_location ]; then handle_early_exit_on_restore "Invalid directory --backup_location: $backup_location - doesnt exist"; fi if [ -z "$(ls -A "$backup_location")" ]; then echo "Invalid --backup_location: $backup_location - directory is empty."; fi; if [ ! -d $backup_location$load_date ]; then handle_early_exit_on_restore "Invalid directory --load: $backup_location$load_date - doesnt exist"; else printf " - Backup directory exists\n"; fi fi; local files=$(ls $backup_location$load_date) - if [[ -n "$compress_format" ]]; then + if [[ -n "$compress_format" ]]; then case "$compress_format" in - pigz) - flag_cs_local=false + pigz) + flag_cs_local=false flag_mysql=$skip_mdb while read -r line; do @@ -2031,17 +2177,17 @@ validation_prechecks_for_restore() { fi ;; *) # unknown option - handle_early_exit_on_backup "\nUnknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nUnknown compression flag: $compress_format\n" esac - else - + else + flag_configs=false flag_data=false flag_mysql=$skip_mdb files=$(ls $backup_location$load_date ) while read -r line; do folder_name=$(echo "$line" | awk '{ print $NF }') - + case "$folder_name" in *configs*) flag_configs=true ;; *data*) flag_data=true ;; @@ -2059,15 +2205,15 @@ validation_prechecks_for_restore() { handle_early_exit_on_restore " Backup is missing subdirectories... \nls $backup_location$load_date\n" fi fi - - elif [ $storage == "S3" ]; then - + + elif [ $storage == "S3" ]; then + # Check for concurrency settings - https://docs.aws.amazon.com/cli/latest/topic/s3-config.html if [ $cloud == "aws" ]; then cli_concurrency=$(sudo grep max_concurrent_requests ~/.aws/config 2>/dev/null ); if [ -z "$cli_concurrency" ]; then echo "[!!!] check: '~/.aws/config' - We recommend increasing s3 concurrency for better throughput to/from S3. This value should scale with avaialble CPU and networking capacity"; printf "example: aws configure set default.s3.max_concurrent_requests 200\n"; fi; fi; # Validate addtional relevant arguments if [ -z "$backup_bucket" ]; then handle_early_exit_on_restore "Invalid --backup_bucket: $backup_bucket - is empty"; fi - + # Prepare commands for each cloud if [ $cloud == "gcp" ]; then check1="$gsutil ls $backup_bucket"; @@ -2075,7 +2221,7 @@ validation_prechecks_for_restore() { check3="$gsutil ls $new_bucket"; check4="$gsutil ls $backup_bucket/$load_date"; check5="$gsutil ls $backup_bucket/$load_date/"; - else + else check1="$awscli $add_s3_api_flags s3 ls $backup_bucket" check2="$awscli $add_s3_api_flags s3 ls | grep $new_bucket" check3="$awscli $add_s3_api_flags s3 ls $new_bucket" @@ -2086,33 +2232,33 @@ validation_prechecks_for_restore() { # Check aws cli access to bucket if $check1 > /dev/null ; then echo -e " - Success listing backup bucket" - else + else handle_early_exit_on_restore "[X] Failed to list bucket contents...\n$check1\n" fi # New bucket exists and empty check - if [ "$new_bucket" ] && [ $pm == "pm1" ]; then + if [ "$new_bucket" ] && [ $pm == "pm1" ]; then # Removing as storage.buckets.get permission likely not needed - # new_bucket_exists=$($check2); if [ -z "$new_bucket_exists" ]; then handle_early_exit_on_restore "[!!!] Didnt find new bucket - Check: $check2\n"; fi; echo "[+] New Bucket exists"; + # new_bucket_exists=$($check2); if [ -z "$new_bucket_exists" ]; then handle_early_exit_on_restore "[!!!] Didnt find new bucket - Check: $check2\n"; fi; echo "[+] New Bucket exists"; # Throw warning if new bucket is NOT empty - if ! $continue; then nb_contents=$($check3 | wc -l); - if [ $nb_contents -lt 1 ]; then - echo " - New Bucket is empty"; - else - echo "[!!!] New bucket is NOT empty... $nb_contents files exist... exiting"; - echo "add "--continue" to skip this exit"; - echo -e "\nExample empty bucket command:\n aws s3 rm $new_bucket --recursive\n gsutil -m rm -r $new_bucket/* \n"; - handle_early_exit_on_restore "Please empty bucket or add --continue \n"; - fi; - else - echo " - [!] Skipping empty new_bucket check"; - fi; + if ! $continue; then nb_contents=$($check3 | wc -l); + if [ $nb_contents -lt 1 ]; then + echo " - New Bucket is empty"; + else + echo "[!!!] New bucket is NOT empty... $nb_contents files exist... exiting"; + echo "add "--continue" to skip this exit"; + echo -e "\nExample empty bucket command:\n aws s3 rm $new_bucket --recursive\n gsutil -m rm -r $new_bucket/* \n"; + handle_early_exit_on_restore "Please empty bucket or add --continue \n"; + fi; + else + echo " - [!] Skipping empty new_bucket check"; + fi; fi # Check if s3 bucket load date exists if $check4 > /dev/null ; then echo -e " - Backup directory exists" - else + else handle_early_exit_on_restore "\n[X] Backup directory load date ($backup_bucket/$load_date) DOES NOT exist in S3 \nCheck - $check4 \n\n"; fi @@ -2121,8 +2267,8 @@ validation_prechecks_for_restore() { if [[ -n "$compress_format" ]]; then # when compressed, storagemanager/configs wont exist but rather pigz_chunkXXX does. and mysql could be split up case "$compress_format" in - pigz) - flag_cs_local=false + pigz) + flag_cs_local=false flag_mysql=$skip_mdb flag_columnstoreData=$skip_bucket_data @@ -2146,7 +2292,7 @@ validation_prechecks_for_restore() { fi ;; *) # unknown option - handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" esac else flag_configs=false @@ -2164,7 +2310,7 @@ validation_prechecks_for_restore() { *configs*) flag_configs=true ;; #*) echo "unknown: $folder_name" ;; esac - done <<< "$folders" + done <<< "$folders" if $flag_storagemanager && $flag_mysql && $flag_columnstoreData && $flag_configs; then echo " - Backup subdirectories exist" @@ -2177,7 +2323,7 @@ validation_prechecks_for_restore() { handle_early_exit_on_restore " Backup is missing subdirectories... \n$check5\n" fi fi; - else + else handle_early_exit_on_restore " Failed to list backup contents...\n$check5\n" fi; fi; @@ -2185,10 +2331,10 @@ validation_prechecks_for_restore() { if $quiet; then xtra_cmd_args+=" 2> /dev/null"; rsync_flags=" -a"; fi } -# Restore Columnstore.xml by updating critical parameters +# Restore Columnstore.xml by updating critical parameters # Depends on $CS_CONFIGS_PATH/$col_xml_backup restore_columnstore_values() { - + printf "\nRestore Columnstore.xml Values" if ! $quiet; then printf "\n"; else printf "... "; fi; columnstore_config_pairs_to_transfer=( @@ -2209,20 +2355,20 @@ restore_columnstore_values() { for pair in "${columnstore_config_pairs_to_transfer[@]}"; do level_one=$(echo "$pair" | cut -d ' ' -f 1) level_two=$(echo "$pair" | cut -d ' ' -f 2) - + # Get the source value using mcsGetConfig -c source_value=$(mcsGetConfig -c "$CS_CONFIGS_PATH/$col_xml_backup" "$level_one" "$level_two") - + if [ -n "$source_value" ]; then # Set the value in the active Columnstore.xml using mcsSetConfig if mcsSetConfig "$level_one" "$level_two" "$source_value"; then # echo instead of printf to avoid escaping % from HashJoin TotalUmMemory - if ! $quiet; then echo " - Set $level_one $level_two $source_value"; fi; + if ! $quiet; then echo " - Set $level_one $level_two $source_value"; fi; else printf "\n[!] Failed to Set $level_one $level_two $source_value \n"; fi else - if ! $quiet; then printf " - N/A: $level_one $level_two \n"; fi; + if ! $quiet; then printf " - N/A: $level_one $level_two \n"; fi; fi done if $quiet; then printf " Done\n"; fi; @@ -2231,11 +2377,11 @@ restore_columnstore_values() { run_restore() { # Branch logic based on topology - if [ $storage == "LocalStorage" ]; then + if [ $storage == "LocalStorage" ]; then # Branch logic based on where the backup resides if [ $backup_destination == "Local" ]; then - + # MariaDB Columnstore Restore printf "\nRestore MariaDB Columnstore LocalStorage\n" @@ -2246,18 +2392,18 @@ run_restore() i=1; while [ $i -le $DBROOT_COUNT ]; do if [[ $pm == "pm$i" || $HA == true ]]; then - printf " - Deleting Columnstore Data$i ... "; - rm -rf /var/lib/columnstore/data$i/*; - printf " Done \n"; + printf " - Deleting Columnstore Data$i ... "; + rm -rf /var/lib/columnstore/data$i/*; + printf " Done \n"; fi ((i++)) - done + done tar_flags="xf" if ! $quiet; then tar_flags+="v"; fi; cs_prefix="$backup_location$load_date/$split_file_cs_prefix.$compress_format" case $compress_format in - pigz) + pigz) printf " - Decompressing CS Files -> local ... " cd / if ! eval "pigz -dc -p $PARALLEL_THREADS $cs_prefix | tar $tar_flags - "; then @@ -2266,16 +2412,16 @@ run_restore() printf " Done \n" ;; *) # unknown option - handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" esac # Set permissions after restoring files i=1; while [ $i -le $DBROOT_COUNT ]; do if [[ $pm == "pm$i" || $HA == true ]]; then - printf " - Chowning Columnstore Data$i ... "; - chown $columnstore_user:$columnstore_user -R /var/lib/columnstore/data$i/; - printf " Done \n"; + printf " - Chowning Columnstore Data$i ... "; + chown $columnstore_user:$columnstore_user -R /var/lib/columnstore/data$i/; + printf " Done \n"; fi ((i++)) done @@ -2287,25 +2433,25 @@ run_restore() while [ $i -le $DBROOT_COUNT ]; do if [[ $pm == "pm$i" || $HA == true ]]; then printf " - Restoring Columnstore Data$i ... "; - rm -rf /var/lib/columnstore/data$i/*; + rm -rf /var/lib/columnstore/data$i/*; rsync $rsync_flags $backup_location$load_date/data$i/ /var/lib/columnstore/data$i/; - chown $columnstore_user:$columnstore_user -R /var/lib/columnstore/data$i/; - printf " Done Data$i \n"; + chown $columnstore_user:$columnstore_user -R /var/lib/columnstore/data$i/; + printf " Done Data$i \n"; fi ((i++)) - done - - # Put configs in place + done + + # Put configs in place printf " - Columnstore Configs ... " rsync $rsync_flags $backup_location$load_date/configs/storagemanager.cnf $STORAGEMANGER_CNF rsync $rsync_flags $backup_location$load_date/configs/Columnstore.xml $CS_CONFIGS_PATH/$col_xml_backup rsync $rsync_flags $backup_location$load_date/configs/mysql/$pm/ $MARIADB_SERVER_CONFIGS_PATH/ - if [ -f "$backup_location$load_date/configs/cmapi_server.conf" ]; then - rsync $rsync_flags $backup_location$load_date/configs/cmapi_server.conf $CS_CONFIGS_PATH/$cmapi_backup ; + if [ -f "$backup_location$load_date/configs/cmapi_server.conf" ]; then + rsync $rsync_flags $backup_location$load_date/configs/cmapi_server.conf $CS_CONFIGS_PATH/$cmapi_backup ; fi; - printf " Done\n" + printf " Done\n" fi - + elif [ $backup_destination == "Remote" ]; then printf "[~] Copy MySQL Data..." tmp="localscpcopy-$load_date" @@ -2317,36 +2463,36 @@ run_restore() # Loop through per dbroot printf "[~] Columnstore Data..."; i=1; while [ $i -le $DBROOT_COUNT ]; do - if [[ $pm == "pm$i" || $HA == true ]]; then - rm -rf /var/lib/columnstore/data$i/; - rsync -av $scp:$backup_location$load_date/data$i/ /var/lib/columnstore/data$i/ ; + if [[ $pm == "pm$i" || $HA == true ]]; then + rm -rf /var/lib/columnstore/data$i/; + rsync -av $scp:$backup_location$load_date/data$i/ /var/lib/columnstore/data$i/ ; chown $columnstore_user:$columnstore_user -R /var/lib/columnstore/data$i/; fi ((i++)) done - printf "[+] Columnstore Data... Done\n" + printf "[+] Columnstore Data... Done\n" # Put configs in place printf " - Columnstore Configs ... " rsync $rsync_flags $backup_location$load_date/configs/storagemanager.cnf $STORAGEMANGER_CNF - rsync $rsync_flags $backup_location$load_date/configs/Columnstore.xml $CS_CONFIGS_PATH/$col_xml_backup - rsync $rsync_flags $backup_location$load_date/configs/mysql/$pm/ $MARIADB_SERVER_CONFIGS_PATH/ - if [ -f "$backup_location$load_date/configs/cmapi_server.conf" ]; then - rsync $rsync_flags $backup_location$load_date/configs/cmapi_server.conf $CS_CONFIGS_PATH/$cmapi_backup ; + rsync $rsync_flags $backup_location$load_date/configs/Columnstore.xml $CS_CONFIGS_PATH/$col_xml_backup + rsync $rsync_flags $backup_location$load_date/configs/mysql/$pm/ $MARIADB_SERVER_CONFIGS_PATH/ + if [ -f "$backup_location$load_date/configs/cmapi_server.conf" ]; then + rsync $rsync_flags $backup_location$load_date/configs/cmapi_server.conf $CS_CONFIGS_PATH/$cmapi_backup ; fi; printf " Done\n" load_date=$tmp - else + else handle_early_exit_on_restore "Invalid Script Variable --backup_destination: $backup_destination" fi - # MariaDB Server Restore + # MariaDB Server Restore printf "\nRestore MariaDB Server\n" if [[ -n "$compress_format" ]]; then # Handle compressed mariadb-backup restore mbd_prefix="$backup_location$load_date/$split_file_mdb_prefix.$compress_format" case $compress_format in - pigz) + pigz) printf " - Decompressing MariaDB Files -> $MARIADB_PATH ... " rm -rf $MARIADB_PATH/* cd / @@ -2355,14 +2501,14 @@ run_restore() fi; printf " Done \n" printf " - Running mariabackup --prepare ... " - if eval "mariabackup --prepare --target-dir=$MARIADB_PATH $xtra_cmd_args"; then - printf " Done\n"; - else - handle_early_exit_on_restore "Failed to --prepare\nmariabackup --prepare --target-dir=$MARIADB_PATH"; - fi; + if eval "mariabackup --prepare --target-dir=$MARIADB_PATH $xtra_cmd_args"; then + printf " Done\n"; + else + handle_early_exit_on_restore "Failed to --prepare\nmariabackup --prepare --target-dir=$MARIADB_PATH"; + fi; ;; *) # unknown option - handle_early_exit_on_backup "\nUnknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nUnknown compression flag: $compress_format\n" esac else eval "mariabackup --prepare --target-dir=$backup_location$load_date/mysql/ $xtra_cmd_args" @@ -2379,18 +2525,18 @@ run_restore() printf "\nRestore MariaDB Columnstore S3\n" # Sync the columnstore data from the backup bucket to the new bucket - if ! $skip_bucket_data && [ "$new_bucket" ] && [ $pm == "pm1" ]; then + if ! $skip_bucket_data && [ "$new_bucket" ] && [ $pm == "pm1" ]; then printf "[+] Starting Bucket Sync:\n - $backup_bucket/$load_date/columnstoreData to $new_bucket\n" s3sync $backup_bucket/$load_date/columnstoreData/ $new_bucket/ " - Done with S3 Bucket sync\n" - else + else printf "[!] Skipping Columnstore Bucket\n" fi; - + printf "[+] Starting Columnstore Configurations & Metadata: \n" if [[ -n "$compress_format" ]]; then cs_prefix="$backup_bucket/$load_date/$split_file_cs_prefix.$compress_format" case $compress_format in - pigz) + pigz) printf " - Decompressing CS Files bucket -> local ... " if [ $cloud == "gcp" ]; then # gsutil supports simple "cat prefix*" to standard out unlike awscli @@ -2399,11 +2545,11 @@ run_restore() handle_early_exit_on_restore "Failed to decompress and untar columnstore localfiles\ncommand:\n$gsutil cat $cs_prefix* | gzip -dc | tar xf - \n\n" fi elif [ $cloud == "aws" ]; then - + # List all the pigz compressed chunks in the S3 prefix - should I clear prior local files? chunk_list=$($awscli s3 ls "$cs_prefix" | awk '{print $NF}') cd / - + # Use process substitution to concatenate the compressed chunks and pipe to pigz and extract using tar if ! eval "pigz -dc -p $PARALLEL_THREADS <(for chunk in \$chunk_list; do $awscli s3 cp \"$backup_bucket/$load_date/\$chunk\" - ; done) | tar xf - "; then handle_early_exit_on_restore "Failed to decompress and untar columnstore localfiles\ncommand:\npigz -dc -p $PARALLEL_THREADS <(for chunk in \$chunk_list; do $awscli s3 cp \"$backup_bucket/$load_date/\$chunk\" - ; done) | tar xf - \n\n" @@ -2412,21 +2558,21 @@ run_restore() printf " Done \n" ;; *) # unknown option - handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" esac - + else - # Columnstore.xml and cmapi are renamed so that only specifc values are restored + # Columnstore.xml and cmapi are renamed so that only specifc values are restored s3cp $backup_bucket/$load_date/configs/Columnstore.xml $CS_CONFIGS_PATH/$col_xml_backup s3cp $backup_bucket/$load_date/configs/cmapi_server.conf $CS_CONFIGS_PATH/$cmapi_backup - s3cp $backup_bucket/$load_date/configs/storagemanager.cnf $STORAGEMANGER_CNF + s3cp $backup_bucket/$load_date/configs/storagemanager.cnf $STORAGEMANGER_CNF s3sync $backup_bucket/$load_date/storagemanager/cache/data$pm_number $cs_cache/data$pm_number/ " - Done cache/data$pm_number/\n" s3sync $backup_bucket/$load_date/storagemanager/metadata/data$pm_number $cs_metadata/data$pm_number/ " - Done metadata/data$pm_number/\n" if ! $skip_mdb; then s3sync $backup_bucket/$load_date/configs/mysql/$pm/ $MARIADB_SERVER_CONFIGS_PATH/ " - Done $MARIADB_SERVER_CONFIGS_PATH/\n"; fi if s3ls "$backup_bucket/$today/storagemanager/journal/data$ASSIGNED_DBROOT" > /dev/null 2>&1; then s3sync $backup_bucket/$load_date/storagemanager/journal/data$pm_number $cs_journal/data$pm_number/ " - Done journal/data$pm_number/\n" - else + else echo " - Done journal/data$pm_number was empty" fi fi; @@ -2435,44 +2581,44 @@ run_restore() printf "[+] Adjusting storagemanager.cnf ... \n" target_bucket=$backup_bucket target_prefix="prefix = $load_date\/columnstoreData\/" - if [ -n "$new_bucket" ]; then - target_bucket=$new_bucket; - target_prefix="# prefix \= cs\/"; + if [ -n "$new_bucket" ]; then + target_bucket=$new_bucket; + target_prefix="# prefix \= cs\/"; fi - if [ -n "$new_region" ]; then + if [ -n "$new_region" ]; then if [ $cloud == "gcp" ]; then endpoint="storage.googleapis.com"; else endpoint="s3.$new_region.amazonaws.com"; fi; - sed -i "s|^endpoint =.*|endpoint = ${endpoint}|g" $STORAGEMANGER_CNF; - sed -i "s|^region =.*|region = ${new_region}|g" $STORAGEMANGER_CNF; + sed -i "s|^endpoint =.*|endpoint = ${endpoint}|g" $STORAGEMANGER_CNF; + sed -i "s|^region =.*|region = ${new_region}|g" $STORAGEMANGER_CNF; echo " - Adjusted endpoint & region"; fi # Removes prefix of the protocol and escapes / for sed target_bucket_name=$(echo "${target_bucket#$protocol://}" | sed "s/\//\\\\\//g") - if [ -n "$new_key" ]; then - sed -i "s|aws_access_key_id =.*|aws_access_key_id = ${new_key}|g" $STORAGEMANGER_CNF; - echo " - Adjusted aws_access_key_id"; + if [ -n "$new_key" ]; then + sed -i "s|aws_access_key_id =.*|aws_access_key_id = ${new_key}|g" $STORAGEMANGER_CNF; + echo " - Adjusted aws_access_key_id"; fi - if [ -n "$new_secret" ]; then - sed -i "s|aws_secret_access_key =.*|aws_secret_access_key = ${new_secret}|g" $STORAGEMANGER_CNF; - echo " - Adjusted aws_secret_access_key"; + if [ -n "$new_secret" ]; then + sed -i "s|aws_secret_access_key =.*|aws_secret_access_key = ${new_secret}|g" $STORAGEMANGER_CNF; + echo " - Adjusted aws_secret_access_key"; fi - bucket=$( grep -m 1 "^bucket =" $STORAGEMANGER_CNF | sed "s/\//\\\\\//g"); + bucket=$( grep -m 1 "^bucket =" $STORAGEMANGER_CNF | sed "s/\//\\\\\//g"); sed -i "s/$bucket/bucket = $target_bucket_name/g" $STORAGEMANGER_CNF; echo " - Adjusted bucket"; - + prefix=$( grep -m 1 "^prefix =" $STORAGEMANGER_CNF | sed "s/\//\\\\\//g"); - if [ ! -z "$prefix" ]; then - sed -i "s/$prefix/$target_prefix/g" $STORAGEMANGER_CNF; - echo " - Adjusted prefix"; + if [ ! -z "$prefix" ]; then + sed -i "s/$prefix/$target_prefix/g" $STORAGEMANGER_CNF; + echo " - Adjusted prefix"; fi; # Check permissions chown -R $columnstore_user:$columnstore_user /var/lib/columnstore/ chown -R root:root $MARIADB_SERVER_CONFIGS_PATH/ - + # Confirm S3 connection works if [ $mode == "direct" ]; then echo "[+] Checking S3 Connection ..."; if testS3Connection $xtra_cmd_args; then echo " - S3 Connection passes" ; else handle_early_exit_on_restore "\n[X] S3 Connection issues - retest/configure\n"; fi; fi; printf "[+] MariaDB Columnstore Done\n\n" @@ -2480,10 +2626,10 @@ run_restore() if ! $skip_mdb; then printf "Restore MariaDB Server\n" if [[ -n "$compress_format" ]]; then - # Handle compressed mariadb-backup restore + # Handle compressed mariadb-backup restore mbd_prefix="$backup_bucket/$load_date/$split_file_mdb_prefix.$compress_format" case $compress_format in - pigz) + pigz) printf "[+] Decompressing mariadb-backup bucket -> local ... " rm -rf $MARIADB_PATH/* if [ $cloud == "gcp" ]; then @@ -2494,7 +2640,7 @@ run_restore() chunk_list=$($awscli s3 ls "$mbd_prefix" | awk '{print $NF}') cd / - + if ! eval "pigz -dc -p $PARALLEL_THREADS <(for chunk in \$chunk_list; do $awscli s3 cp "$backup_bucket/$load_date/\$chunk" -; done) | mbstream -p $PARALLEL_THREADS -x -C $MARIADB_PATH"; then handle_early_exit_on_restore "Failed to decompress mariadb backup\ncommand:\npigz -dc -p $PARALLEL_THREADS <(for chunk in \$chunk_list; do $awscli s3 cp "$backup_bucket/$load_date/\$chunk" -; done) | mbstream -p $PARALLEL_THREADS -x -C $MARIADB_PATH \n" fi; @@ -2504,29 +2650,29 @@ run_restore() if eval "mariabackup --prepare --target-dir=$MARIADB_PATH $xtra_cmd_args"; then printf " Done\n"; else handle_early_exit_on_restore "Failed to --prepare\nmariabackup --prepare --target-dir=$MARIADB_PATH"; fi; ;; *) # unknown option - handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" + handle_early_exit_on_backup "\nunknown compression flag: $compress_format\n" esac - + else # Copy the MariaDB data, prepare and put in place printf " - Copying down MariaDB data ... " - rm -rf $backup_location$load_date/mysql/ + rm -rf $backup_location$load_date/mysql/ mkdir -p $backup_location$load_date/mysql/ s3sync $backup_bucket/$load_date/mysql/ $backup_location$load_date/mysql " Done\n" # Run prepare and copy back printf " - Running mariabackup --prepare ... " - if eval "mariabackup --prepare --target-dir=$backup_location$load_date/mysql/ $xtra_cmd_args"; then - printf " Done\n"; - else - echo "failed"; + if eval "mariabackup --prepare --target-dir=$backup_location$load_date/mysql/ $xtra_cmd_args"; then + printf " Done\n"; + else + echo "failed"; fi; rm -rf /var/lib/mysql/* > /dev/null printf " - Running mariabackup --copy-back ... " - if eval "mariabackup --copy-back --target-dir=$backup_location$load_date/mysql/ $xtra_cmd_args"; then - printf " Done\n"; - else - echo "failed"; + if eval "mariabackup --copy-back --target-dir=$backup_location$load_date/mysql/ $xtra_cmd_args"; then + printf " Done\n"; + else + echo "failed"; fi; fi; @@ -2543,13 +2689,13 @@ run_restore() fi if [[ $pm == "pm1" ]]; then restore_columnstore_values; fi; - + end=$(date +%s) runtime=$((end-start)) - printf "\n[+] Runtime: $runtime\n" - printf "[+] $final_message\n\n" - if ! $quiet; then - if [ $pm == "pm1" ]; then + printf "\nRuntime: $runtime\n" + printf "$final_message\n\n" + if ! $quiet; then + if [ $pm == "pm1" ]; then echo -e " - Last you need to manually configure mariadb replication between nodes" echo -e " - systemctl start mariadb " echo -e " - systemctl start mariadb-columnstore-cmapi " @@ -2571,24 +2717,28 @@ load_default_dbrm_variables() { backup_location=/tmp/dbrm_backups STORAGEMANGER_CNF="/etc/columnstore/storagemanager.cnf" storage=$(grep -m 1 "^service = " $STORAGEMANGER_CNF | awk '{print $3}') + skip_storage_manager=false mode="once" quiet=false - + + list_dbrm_backups=false dbrm_dir="/var/lib/columnstore/data1/systemFiles/dbrm" - if [ "$storage" == "S3" ]; then + if [ "$storage" == "S3" ]; then dbrm_dir="/var/lib/columnstore/storagemanager" fi } -# for next release load_default_dbrm_restore_variables() { - backup_location=/tmp/dbrm_backups + auto_start=true + backup_location="/tmp/dbrm_backups" STORAGEMANGER_CNF="/etc/columnstore/storagemanager.cnf" storage=$(grep -m 1 "^service = " $STORAGEMANGER_CNF | awk '{print $3}') backup_folder_to_restore="" + skip_dbrm_backup=false + skip_storage_manager=false dbrm_dir="/var/lib/columnstore/data1/systemFiles/dbrm" - if [ "$storage" == "S3" ]; then + if [ "$storage" == "S3" ]; then dbrm_dir="/var/lib/columnstore/storagemanager" fi } @@ -2597,35 +2747,39 @@ print_dbrm_backup_help_text() { echo " Columnstore DBRM Backup - -m | --mode 'loop' or 'once' ; Determines if this script runs in a forever loop sleeping -i minutes or just once - -i | --interval Number of minutes to sleep when --mode loop - -r | --retention-days Number of days of dbrm backups to retain - script will delete based on last update file time - -p | --path path of where to save the dbrm backups on disk - -nb | --name-backup custom name to prefex dbrm backups with + -m | --mode ['loop','once']; Determines if this script runs in a forever loop sleeping -i minutes or just once + -i | --interval Number of minutes to sleep when --mode loop + -r | --retention-days Retain dbrm backups created within the last X days, the rest are deleted + -p | --path path of where to save the dbrm backups on disk + -nb | --name-backup custom name to prefex dbrm backups with + -ssm | --skip-storage-manager skip backing up storagemanager directory Default: ./$0 dbrm_backup -m once --retention-days 7 --path /tmp/dbrm_backups Examples: ./$0 dbrm_backup --mode loop --interval 90 --retention-days 7 --path /mnt/dbrm_backups ./$0 dbrm_backup --mode once --retention-days 7 --path /mnt/dbrm_backups -nb my-one-off-backup - + Cron Example: */60 */3 * * * root bash /root/$0 dbrm_backup -m once --retention-days 7 --path /tmp/dbrm_backups >> /tmp/dbrm_backups/cs_backup.log 2>&1 "; } -# for next release print_dbrm_restore_help_text() { echo " Columnstore DBRM Restore - -p | --path path of where to save the dbrm backups on disk - -d | --directory date or directory chose to restore from + -p | --path path of where to save the dbrm backups on disk + -d | --directory date or directory chose to restore from + -ns | --no-start do not attempt columnstore startup post dbrm_restore + -sdbk| --skip-dbrm-backup skip backing up dbrms brefore restoring + -ssm | --skip-storage-manager skip restoring storagemanager directory - Default: ./$0 dbrm_restore --path /tmp/dbrm_backups + Default: ./$0 dbrm_restore --path /tmp/dbrm_backups Examples: - ./$0 dbrm_restore --path /tmp/dbrm_backups --directory dbrm_backup12252023 + ./$0 dbrm_restore --path /tmp/dbrm_backups --directory dbrm_backup_20240318_172842 + ./$0 dbrm_restore --path /tmp/dbrm_backups --directory dbrm_backup_20240318_172842 --no-start "; } @@ -2663,6 +2817,10 @@ parse_dbrms_variables() { shift # past argument shift # past value ;; + -ssm|--skip-storage-manager) + skip_storage_manager=true + shift # past argument + ;; -q | --quiet) quiet=true shift # past argument @@ -2671,6 +2829,10 @@ parse_dbrms_variables() { print_dbrm_backup_help_text; exit 1; ;; + "list") + list_dbrm_backups=true + shift # past argument + ;; *) # unknown option printf "\nunknown flag: $1\n" print_dbrm_backup_help_text @@ -2679,7 +2841,6 @@ parse_dbrms_variables() { done } -# for next release parse_dbrm_restore_variables() { # Dynamic Arguments @@ -2699,6 +2860,18 @@ parse_dbrm_restore_variables() { shift # past argument shift # past value ;; + -ns | --no-start) + auto_start=false + shift # past argument + ;; + -sdbk| --skip-dbrm-backup) + skip_dbrm_backup=true + shift # past argument + ;; + -ssm|--skip-storage-manager) + skip_storage_manager=true + shift # past argument + ;; -h|--help|-help|help) print_dbrm_restore_help_text; exit 1; @@ -2709,6 +2882,11 @@ parse_dbrm_restore_variables() { exit 1; esac done + + if [[ "${backup_location: -1}" == "/" ]]; then + # Remove the final / + backup_location="${backup_location%/}" + fi } confirm_numerical_or_decimal_else_fail() { @@ -2749,9 +2927,9 @@ validation_prechecks_for_dbrm_backup() { confirm_numerical_or_decimal_else_fail "$retention_days" "Retention" # Check backup location exists - if [ ! -d $backup_location ]; then + if [ ! -d $backup_location ]; then echo "[+] Created: $backup_location" - mkdir "$backup_location"; + mkdir "$backup_location"; fi; # Confirm bucket connection @@ -2763,25 +2941,139 @@ validation_prechecks_for_dbrm_backup() { fi; } -# for next release +validation_prechecks_before_listing_restore_options() { + + # confirm backup directory exists + if [ ! -d $backup_location ]; then + printf "[!!] Backups Directory does NOT exist --path $backup_location \n" + printf "ls -la $backup_location\n\n" + exit 1; + fi + + # Check if backup directory is empty + if [ -z "$(find "$backup_location" -mindepth 1 | head )" ]; then + printf "[!!] Backups Directory is empty --path $backup_location \n" + printf "ls -la $backup_location\n\n" + exit 1 + fi +} + validation_prechecks_for_dbrm_restore() { + + printf "Prechecks\n" + echo "--------------------------------------------------------------------------" # Confirm storage not empty if [ -z "$storage" ]; then printf "[!] Empty storage: \ncheck: grep -m 1 \"^service = \" \$STORAGEMANGER_CNF | awk '{print \$3}' \n\n"; fi; - # Check backup location exists - if [ ! -d $backup_location ]; then - echo "[!] \$backup_location: Expected directory of dbrm_backups" + # Check backup directory exists + if [ ! -d $backup_location ]; then + printf "[!] \$backup_location: Path of backups does Not exist\n" + printf "Path: $backup_location\n\n" exit 2; fi + # Check specific backup exists + backup_folder_to_restore_dbrms=$backup_folder_to_restore + if [ $storage == "S3" ]; then + backup_folder_to_restore_dbrms="${backup_folder_to_restore}/dbrms" + fi + + if [ ! -d "${backup_location}/${backup_folder_to_restore_dbrms}" ]; then + printf "[!] \$backup_folder_to_restore: Path of backup to restore does Not exist\n" + printf "Path: ${backup_location}/${backup_folder_to_restore_dbrms}\n\n" + exit 2; + else + echo " - Backup directory exists" + if [ "$(ls -A ${backup_location}/${backup_folder_to_restore_dbrms})" ]; then + + expected_files=( + "BRM_saves_current" + "BRM_saves_em" + "BRM_saves_journal" + "BRM_saves_vbbm" + "BRM_saves_vss" + "BRM_savesA_em" + "BRM_savesA_vbbm" + "BRM_savesA_vss" + "BRM_savesB_em" + "BRM_savesB_vbbm" + "BRM_savesB_vss" + "oidbitmap" + "SMTxnID" + ) + + # Check if all expected files exist + for file in "${expected_files[@]}"; do + if [ ! -f "${backup_location}/${backup_folder_to_restore_dbrms}/${file}" ]; then + printf "[!] File not found: ${file} in the DBRM backup directory\n" + printf "Path: ${backup_location}/${backup_folder_to_restore_dbrms}/${file}\n\n" + exit 2; + fi + done + + # For S3 check storagemanager dir exists in backup unless skip storagemanager is passed + if [ "$storage" == "S3" ] && [ $skip_storage_manager == false ]; then + if [ ! -d "${backup_location}/${backup_folder_to_restore}/metadata" ]; then + printf "\n[!!] Path Not Found: ${backup_location}/${backup_folder_to_restore}/metadata \n" + printf "Retry with a different backup to restore or use flag --skip-storage-manager\n\n" + exit 2; + fi; + fi + + + printf " - Backup contains all files\n" + + else + printf "[!] No files found in the DBRM backup directory\n" + printf "Path: ${backup_location}/${backup_folder_to_restore_dbrms}\n\n" + exit 2; + fi + fi + # Confirm bucket connection if [ "$storage" == "S3" ]; then - if ! testS3Connection 1>/dev/null 2>/dev/null; then + if testS3Connection 1>/dev/null 2>/dev/null; then + echo " - S3 Connection works" + else printf "\n[!] Failed testS3Connection\n\n" exit 1; fi fi; + # Download cs_package_manager.sh if not exists + if [ ! -f "cs_package_manager.sh" ]; then + wget https://raw.githubusercontent.com/mariadb-corporation/mariadb-columnstore-engine/develop/cmapi/scripts/cs_package_manager.sh; chmod +x cs_package_manager.sh; + fi; + if source cs_package_manager.sh source ;then + echo " - Sourced cs_package_manager.sh" + else + printf "\n[!!] Failed to source cs_package_manager.sh\n\n" + exit 1; + + fi + + # Confirm the function exists and the source of cs_package_manager.sh worked + if command -v check_package_managers &> /dev/null; then + # The function exists, call it + check_package_managers + else + echo "Error: 'check_package_managers' function not found via cs_package_manager.sh"; + exit 1; + fi + cs_package_manager_functions=( + "start_cmapi" + "start_mariadb" + "init_cs_up" + ) + + for func in "${cs_package_manager_functions[@]}"; do + if command -v $func &> /dev/null; then + continue; + else + echo "Error: '$func' function not found via cs_package_manager.sh"; + exit 1; + fi + done } process_dbrm_backup() { @@ -2789,16 +3081,30 @@ process_dbrm_backup() { load_default_dbrm_variables parse_dbrms_variables "$@"; + if $list_dbrm_backups; then + validation_prechecks_before_listing_restore_options + printf "\nExisting DBRM Backups\n"; + list_restore_options_from_backups "$@" + echo "--------------------------------------------------------------------------" + printf "Restore with ./$0 dbrm_restore --path $backup_location --directory \n\n" + exit 0; + fi; + if ! $quiet ; then - printf "\n[+] Inputs\n"; - printf " CS Storage: $storage\n"; - printf " Source: $dbrm_dir\n"; - printf " Backups: $backup_location\n"; - if [ "$mode" == "loop" ]; then - printf " Interval: $backup_interval_minutes minutes\n"; + + printf "\nDBRM Backup\n"; + echo "--------------------------------------------------------------------------" + if [ "$storage" == "S3" ]; then echo "Skips: Storagemanager($skip_storage_manager)"; fi; + echo "--------------------------------------------------------------------------" + printf "CS Storage: $storage\n"; + printf "Source: $dbrm_dir\n"; + printf "Backups: $backup_location\n"; + if [ "$mode" == "loop" ]; then + printf "Interval: $backup_interval_minutes minutes\n"; fi; - printf " Retention: $retention_days day(s)\n" - printf " Mode: $mode\n\n" + printf "Retention: $retention_days day(s)\n" + printf "Mode: $mode\n" + echo "--------------------------------------------------------------------------" fi; validation_prechecks_for_dbrm_backup @@ -2809,12 +3115,17 @@ process_dbrm_backup() { timestamp=$(date +%Y%m%d_%H%M%S) backup_folder="$backup_location/${backup_base_name}_${timestamp}" mkdir -p "$backup_folder" - - # Copy files to the backup directory - cp -arp "$dbrm_dir"/* "$backup_folder" - if [ "$storage" == "S3" ]; then + # Copy files to the backup directory + if [[ $skip_storage_manager == false || $storage == "LocalStorage" ]]; then + if ! $quiet; then printf " - copying $dbrm_dir ..."; fi; + cp -arp "$dbrm_dir"/* "$backup_folder" + if ! $quiet; then printf " Done\n"; fi; + fi + + if [ "$storage" == "S3" ]; then # smcat em files to disk + if ! $quiet; then printf " - copying DBRMs from bucket ..."; fi; mkdir $backup_folder/dbrms/ smls /data1/systemFiles/dbrm 2>/dev/null > $backup_folder/dbrms/dbrms.txt smcat /data1/systemFiles/dbrm/BRM_saves_current 2>/dev/null > $backup_folder/dbrms/BRM_saves_current @@ -2828,190 +3139,524 @@ process_dbrm_backup() { smcat /data1/systemFiles/dbrm/BRM_savesB_em 2>/dev/null > $backup_folder/dbrms/BRM_savesB_em smcat /data1/systemFiles/dbrm/BRM_savesB_vbbm 2>/dev/null > $backup_folder/dbrms/BRM_savesB_vbbm smcat /data1/systemFiles/dbrm/BRM_savesB_vss 2>/dev/null > $backup_folder/dbrms/BRM_savesB_vss + smcat /data1/systemFiles/dbrm/oidbitmap 2>/dev/null > $backup_folder/dbrms/oidbitmap + smcat /data1/systemFiles/dbrm/SMTxnID 2>/dev/null > $backup_folder/dbrms/SMTxnID + smcat /data1/systemFiles/dbrm/tablelocks 2>/dev/null > $backup_folder/dbrms/tablelocks + if ! $quiet; then printf " Done\n"; fi; fi - - # Clean up old backups - # example: find /tmp/dbrmBackups/ -maxdepth 1 -type d -name "dbrm_backup_*" -mtime +1 -exec rm -r {} \; - find "$backup_location" -maxdepth 1 -type d -name "${backup_base_name}_*" -mtime +$retention_days -exec rm -r {} \; - printf "[+] Created: $backup_folder\n" - if [ "$mode" == "once" ]; then break; fi; + if [ $retention_days -gt 0 ] ; then + # Clean up old backups + # example: find /tmp/dbrm_backups -maxdepth 1 -type d -name "dbrm_backup_*" -mtime +1 -exec rm -r {} \; + if ! $quiet; then printf " - applying retention policy ..."; fi; + find "$backup_location" -maxdepth 1 -type d -name "${backup_base_name}_*" -mtime +$retention_days -exec rm -r {} \; + if ! $quiet; then printf " Done\n"; fi; + fi; - printf "[+] Sleeping ... $sleep_seconds seconds\n" + printf "Created: $backup_folder\n" + + + if [ "$mode" == "once" ]; then + end=$(date +%s) + runtime=$((end-start)) + if ! $quiet; then printf "Runtime: $runtime\n"; fi; + break; + fi; + + printf "Sleeping ... $sleep_seconds seconds\n" sleep "$sleep_seconds" done - if ! $quiet; then printf "[+] Complete\n\n"; fi; + if ! $quiet; then printf "Complete\n\n"; fi; +} + +is_cmapi_installed() { + + cmapi_installed_command="" + case $package_manager in + yum ) + cmapi_installed_command="yum list installed MariaDB-columnstore-cmapi &> /dev/null;"; + ;; + apt ) + cmapi_installed_command="dpkg-query -s mariadb-columnstore-cmapi &> /dev/null;"; + ;; + *) # unknown option + echo "\npackage manager not implemented: $package_manager\n" + exit 2; + esac + + if eval $cmapi_installed_command ; then + return 0; + else + return 1; + + fi; +} + +start_mariadb_cmapi_columnstore() { + + start_mariadb + start_cmapi + init_cs_up + + # For verbose debugging + #grep -i rollbackAll /var/log/mariadb/columnstore/debug.log | tail -n 3 | awk '{ print $1, $2, $3, $(NF-2), $(NF-1), $NF }' + +} + +# Currently assumes systemd installed +shutdown_columnstore_mariadb_cmapi() { + + pf=35 + init_cs_down + wait_cs_down 0 + + printf "%-${pf}s ... " " - Stopping MariaDB Server" + if ! systemctl stop mariadb; then + echo "[!!] Failed to stop mariadb" + exit 1; + else + printf "Done\n" + fi + + if is_cmapi_installed ; then + printf "%-${pf}s ... " " - Stopping CMAPI" + if ! systemctl stop mariadb-columnstore-cmapi; then + echo "[!!] Failed to stop CMAPI" + exit 1; + else + printf "Done\n" + fi + fi +} + +# Input +# $1 - directory to search +# Output +# subdir_dbrms +# latest_em_file +# em_file_size +# em_file_created +# em_file_full_path +# storagemanager_dir_exists +get_latest_em_from_directory() { + + subdir_dbrms="" + latest_em_file="" + em_file_size="" + em_file_created="" + em_file_full_path="" + storagemanager_dir_exists=true + + # Find the most recently modified file in the current subdirectory + if [ $storage == "S3" ]; then + subdir_dbrms="${1}/dbrms/" + subdir_metadata="${1}/metadata/data1/systemFiles/dbrm/" + + # Handle missing metadata directory + if [ ! -d $subdir_dbrms ]; then + printf "%-45s Missing dbrms sub directory\n" "$(basename $1)" + return 1; + fi + + + if [ -d "${1}/metadata" ]; then + latest_em_meta_file=$(find "${subdir_metadata}" -maxdepth 1 -type f -name "BRM_saves*_em.meta" -exec ls -lat {} + | awk 'NR==1 {printf "%-12s %-4s %-2s %-5s %s\n", $5, $6, $7, $8, $9}'| head -n 1 ) + else + # Handle missing metadata directory & guess the latest em file based on the largest size + + # Example: find /tmp/dbrm_backups/dbrm_backup_20240605_180906/dbrms -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lhS {} + + latest_em_meta_file=$(find "${subdir_dbrms}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lhS {} + | awk 'NR==1 {printf "%-12s %-4s %-2s %-5s %s\n", $5, $6, $7, $8, $9}'| head -n 1) + storagemanager_dir_exists=false + fi + + em_meta_file_name=$(basename "$latest_em_meta_file") + latest_em_file="$subdir_dbrms$(echo $em_meta_file_name | sed 's/\.meta$//' )" + em_file_size=$(ls -la "$latest_em_file" | awk '{print $5}' ) + em_file_created=$(echo "$latest_em_meta_file" | awk '{print $2,$3,$4}' ) + em_file_full_path=$latest_em_file + + if [ ! -f $latest_em_file ]; then + echo "S3 List Option: Failed to find $latest_em_file" + exit; + fi + else + subdir_dbrms="$1" + latest_em_file=$(find "${subdir_dbrms}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lat {} + | awk 'NR==1 {printf "%-12s %-4s %-2s %-5s %s\n", $5, $6, $7, $8, $9}'| head -n 1) + em_file_size=$(echo "$latest_em_file" | awk '{print $1}' ) + em_file_created=$(echo "$latest_em_file" | awk '{print $2,$3,$4}' ) + em_file_full_path=$(echo $latest_em_file | awk '{print $NF}' ) + fi +} + +list_restore_options_from_backups() { + + echo "--------------------------------------------------------------------------" + printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s\n" "Options" "Last-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size" + + # Iterate over subdirectories + for subdir in "${backup_location}"/*; do + + get_latest_em_from_directory "$subdir" + + if [ -f "${subdir_dbrms}/BRM_saves_journal" ]; then + em_file_name=$(basename "$em_file_full_path") + version_prefix=${em_file_name::-3} + journal_file=$(ls -la "${subdir_dbrms}/BRM_saves_journal" 2>/dev/null | awk 'NR==1 {print $5}' ) + vbbm_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vbbm" 2>/dev/null | awk 'NR==1 {print $5}' ) + vss_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vss" 2>/dev/null | awk 'NR==1 {print $5}' ) + if [ $storagemanager_dir_exists == false ]; then + vss_file+=" (No Storagemanager Dir)" + fi; + printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s\n" "$(basename "$subdir")" "$em_file_created" "$em_file_name" "$em_file_size" "$journal_file" "$vbbm_file" "$vss_file" + fi + done + + } -# for next release process_dbrm_restore() { - + load_default_dbrm_restore_variables parse_dbrm_restore_variables "$@" # print current job variables - printf "\n[+] Inputs\n"; - printf " Backups Directory: $backup_location\n"; - printf " Backup to Restore: $backup_folder_to_restore\n"; - printf " CS Storage: $storage\n"; - printf " Restore Target: $dbrm_dir\n"; + printf "\nDBRM Restore Variables\n" + echo "--------------------------------------------------------------------------" + echo "Skips: DBRM Backup($skip_dbrm_backup) Storagemanager($skip_storage_manager)" + echo "--------------------------------------------------------------------------" + printf "CS Storage: $storage \n" + printf "Backups Directory: $backup_location \n" + printf "Backup to Restore: $backup_folder_to_restore \n\n" - # Display restore options - if [ -z "$backup_folder_to_restore" ]; then - printf "\n[!!] --directory is empty\n" - printf "See Options Below: \n" - printf "%-30s %-12s %-13s %-63s %-12s\n" "Option" "Size" "Last updated" "File Location" "Journal Size"; - - # Iterate over subdirectories - for subdir in "${backup_location}"/*; do - # Find the most recently modified file in the current subdirectory - latest_em_file=$(find "${subdir}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lt --time=ctime {} + | awk 'NR==1 {printf "%-12s %-4s %-2s %-5s %s\n", $5, $6, $7, $8, $9}'| head -n 1) - journal_file=$(ls -la "${subdir}/BRM_saves_journal" | awk 'NR==1 {print $5}' ) - printf "%-30s %-90s %-12s\n" "$(basename "$subdir")" "$latest_em_file" "$journal_file"; - done + validation_prechecks_before_listing_restore_options - printf "\n[!!] define which backup to restore eg. --directory dbrm_backup_20240103_183536\n" - printf "exiting ...\n\n" + # Display restore options + if [ -z "$backup_folder_to_restore" ]; then + printf "[!] Pick Option\n" + list_restore_options_from_backups "$@" + printf "\nExample: \n" + printf " --directory dbrm_backup_20240103_183536 \n\n" + printf "Define which backup to restore via flag --directory \n" + echo "Rerun: $0 $@ --directory xxxxxxx" + echo "" exit 1; fi; - + validation_prechecks_for_dbrm_restore + shutdown_columnstore_mariadb_cmapi - if [ ! -f "cs_package_manager.sh" ]; then - wget https://raw.githubusercontent.com/mariadb-corporation/mariadb-columnstore-engine/develop/extra/cs_package_manager.sh; chmod +x cs_package_manager.sh; - fi; - source cs_package_manager.sh source + # Take an automated backup + if [[ $skip_dbrm_backup == false ]]; then + printf " - Saving a DBRM backup before restoring ... \n" + if ! process_dbrm_backup -p $backup_location -r 9999 -nb dbrms_before_restore_backup --quiet ; then + echo "[!!] Failed to take a DBRM backup before restoring" + echo "exiting ..." + exit 1; + fi; + fi; - # Confirm the function exists and the source of cs_package_manager.sh worked - if command -v check_package_managers &> /dev/null; then - # The function exists, call it - check_package_managers - else - echo "Error: 'check_package_managers' function not found via cs_package_manager.sh"; + # Detect newest date _em from the set, if smaller than the current one throw a warning + get_latest_em_from_directory "${backup_location}/${backup_folder_to_restore}" + if [ ! -f $em_file_full_path ]; then + echo "[!] Failed to parse _em file: $em_file_full_path doesnt exist" + exit 1; + fi; + echo "em_file_full_path: $em_file_full_path" + + echo "latest_em_file: $latest_em_file" + echo "em_file_size: $em_file_size" + echo "em_file_created: $em_file_created" + echo "storagemanager_dir_exists: $storagemanager_dir_exists" + echo "subdir_dbrms: $subdir_dbrms" + + em_file_name=$(basename $em_file_full_path) + prefix="${em_file_name%%_em}" + echo "em_file_name: $em_file_name" + echo "prefix: $prefix" + + if [ -z "$em_file_name" ]; then + printf "[!] Undefined EM file name\n" + printf "find "${backup_location}/${backup_folder_to_restore_dbrms}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lat {} + \n\n" exit 1; fi - init_cs_down - wait_cs_down 0 - if ! process_dbrm_backup -nb dbrms_before_restore_backup --quiet ; then - echo "[!!] Failed to take a DBRM backup before restoring" - echo "exiting ..." - exit 1; - fi; - # split logic between S3 & LocalStorage - if [ $storage == "S3" ]; then + if [ $storage == "S3" ]; then process_s3_dbrm_restore else process_localstorage_dbrm_restore fi; } -# for next release -process_s3_dbrm_restore() { - - # shuffle DBRMs - detect newest date _em from the set, if smaller than the current one throw a warning - - # manually run load_brm - - # manually run save_brm - - # start columnstore - - # run a basic health check - return 1; +# $1 - File to cat/ upload into S3 +# $2 - Location in storagemanager to overwrite +# example: smput_or_error "${backup_location}/${backup_folder_to_restore_dbrms}/${prefix}_em" "/data1/systemFiles/dbrm/BRM_saves_em" +smput_or_error() { + if ! cat "$1" | smput "$2" 2>/dev/null; then + printf "[!] Failed to smput: $1\n" + else + printf "." + fi } -# for next release +# Depends on +# em_file - most recent EM +# em_file_full_path - full path to most recent EM +# em_file_name - Just the file name of the EM +# prefix - Prefix of the EM file +process_s3_dbrm_restore() { + + printf_offset=45 + printf "\nBefore DBRMs Restore\n" + echo "--------------------------------------------------------------------------" + if ! command -v smls > /dev/null; then + printf "[!] smls not installed ... Exiting\n\n" + exit 1 + else + current_status=$(smls /data1/systemFiles/dbrm/ 2>/dev/null); + if [ $? -ne 0 ]; then + printf "\n[!] Failed to get smls status\n\n" + exit 1 + fi + echo "$current_status" | grep -E "BRM_saves_em|BRM_saves_vbbm|BRM_saves_vss|BRM_saves_journal|BRM_saves_current|oidbitmap" + fi + + printf "\nRestoring DBRMs\n" + echo "--------------------------------------------------------------------------" + printf " - Desired EM: $em_file_full_path\n" + printf " - Copying DBRMs: ${backup_location}/${backup_folder_to_restore_dbrms} -> S3 Bucket \n" + + printf "\nPreparing\n" + printf "%-${printf_offset}s ..." " - Clearing storagemanager caches" + if [ ! -d "$dbrm_dir/cache" ]; then + echo "Directory $dbrm_dir/cache does not exist." + exit 1 + fi + for cache_dir in "${dbrm_dir}/cache"/*; do + if [ -d "${dbrm_dir}/cache/${cache_dir}" ]; then + echo " - Removing Cache: $cache_dir" + else + printf "." + fi + done + printf " Success\n" + + printf "%-${printf_offset}s ... " " - Starting mcs-storagemanager" + if ! systemctl start mcs-storagemanager ; then + echo "[!!] Failed to start mcs-storagemanager " + exit 1; + else + printf "Done\n" + fi + + printf "\nRestoring\n" + printf "%-${printf_offset}s " " - Restoring Prefix: $prefix " + smput_or_error "${backup_location}/${backup_folder_to_restore_dbrms}/${prefix}_em" "/data1/systemFiles/dbrm/BRM_saves_em" + smput_or_error "${backup_location}/${backup_folder_to_restore_dbrms}/${prefix}_vbbm" "/data1/systemFiles/dbrm/BRM_saves_vbbm" + smput_or_error "${backup_location}/${backup_folder_to_restore_dbrms}/${prefix}_vss" "/data1/systemFiles/dbrm/BRM_saves_vss" + if ! echo "BRM_saves" | smput /data1/systemFiles/dbrm/BRM_saves_current 2>/dev/null; then + printf "[!] Failed to smput: BRM_saves_current\n" + else + printf "." + fi + + em_files=( + "BRM_saves_journal" + "oidbitmap" + "SMTxnID" + "tablelocks" + ) + for file in "${em_files[@]}"; do + if [ ! -f "${backup_location}/${backup_folder_to_restore_dbrms}/${file}" ]; then + printf "[!] File not found: ${file} in the S3 DBRM backup directory\n" + printf "Path: ${backup_location}/${backup_folder_to_restore_dbrms}/${file}\n\n" + continue + fi + smput_or_error "${backup_location}/${backup_folder_to_restore_dbrms}/${file}" "/data1/systemFiles/dbrm/${file}" + done + printf " Success\n" + + printf "%-${printf_offset}s ... " " - Stopping mcs-storagemanager" + if ! systemctl stop mcs-storagemanager ; then + echo "[!!] Failed to stop mcs-storagemanager " + exit 1; + else + printf "Done\n" + fi + printf "%-${printf_offset}s ... " " - clearShm" + clearShm + printf "Done\n" + sleep 2 + + printf "%-${printf_offset}s ... " " - Starting mcs-storagemanager" + if ! systemctl start mcs-storagemanager ; then + echo "[!!] Failed to start mcs-storagemanager " + exit 1; + else + printf "Done\n" + fi + + manually_run_loadbrm_and_savebrm + + printf "\nAfter DBRM Restore\n" + echo "--------------------------------------------------------------------------" + current_status=$(smls /data1/systemFiles/dbrm/ 2>/dev/null); + if [ $? -ne 0 ]; then + printf "\n[!] Failed to get smls status\n\n" + exit 1 + fi + echo "$current_status" | grep -E "BRM_saves_em|BRM_saves_vbbm|BRM_saves_vss|BRM_saves_journal|BRM_saves_current|oidbitmap" + + if $auto_start; then + printf "\nStartup\n" + echo "--------------------------------------------------------------------------" + start_mariadb_cmapi_columnstore + fi + + # printf "\n[+] Health Check ...\n" + # sleep 2 + # # run a basic health check + # mariadb -e "create database if not exists $backup_folder_to_restore" + # mariadb $backup_folder_to_restore -e "drop table if exists t1" + # mariadb $backup_folder_to_restore -e "create table t1 (a int) engine=columnstore" + # mariadb $backup_folder_to_restore -e "insert into t1 values (1)" + # mariadb $backup_folder_to_restore -e "update t1 set a=1" + # mariadb $backup_folder_to_restore -e "delete from t1 where a=1" + # mariadb -e "drop database if exists $backup_folder_to_restore" + + printf "\nDBRM Restore Complete\n\n" +} + +# Depends on +# em_file - most recent EM +# em_file_full_path - full path to most recent EM +# em_file_name - Just the file name of the EM +# prefix - Prefix of the EM process_localstorage_dbrm_restore() { - # shuffle DBRMs - detect newest date _em from the set, if smaller than the current one throw a warning - em_file=$(find "${backup_location}/${backup_folder_to_restore}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lt --time=ctime {} + | head -n 1 ) - #em_file=$(find /tmp/dbrm_backups/dbrm_backup_20240103_195653 -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lt --time=ctime {} + | head -n 1) - em_file_full_path=$(echo $em_file | awk '{print $9}' ) - if [ ! -f $em_file_full_path ]; then - echo "[!] Failed to parse _em file: $em_file_full_path doesnt exist" + printf "\nBefore DBRMs Restore\n" + echo "--------------------------------------------------------------------------" + ls -la "${dbrm_dir}" | grep -E "BRM_saves_em|BRM_saves_vbbm|BRM_saves_vss|BRM_saves_journal|BRM_saves_current" + printf " - Clearing active DBRMs ... " + if rm -rf $dbrm_dir ; then + printf "Done\n" + else + echo "Failed to delete files in $dbrm_dir " exit 1; - fi; - em_file_name=$(basename $em_file_full_path) - prefix="${em_file_name%%_em}" + fi - echo "[+] Clearing Active DBRMs" - rm -rf $dbrm_dir + printf "\nRestoring DBRMs\n" + echo "--------------------------------------------------------------------------" + printf " - Desired EM: $em_file_full_path\n" + printf " - Copying DBRMs: \"${backup_location}/${backup_folder_to_restore_dbrms}\" -> \"$dbrm_dir\" \n" + cp -arp "${backup_location}/${backup_folder_to_restore_dbrms}" $dbrm_dir - echo "[+] Copying backup DBRMs to active DBRMs dir" - cp -arp "${backup_location}/${backup_folder_to_restore}" $dbrm_dir - - echo "[+] Restoring Prefix: $prefix" - vbbm_name="${prefix}_vbbm" - vss_name="${prefix}_vss" - cp -arp "${dbrm_dir}/$em_file_name" "${dbrm_dir}/BRM_saves_em" - cp -arp "${dbrm_dir}/$vbbm_name" "${dbrm_dir}/BRM_saves_vbbm" - cp -arp "${dbrm_dir}/$vss_name" "${dbrm_dir}/BRM_saves_vss" - echo "Primary Extent Map Files Now:" - ls -la "${dbrm_dir}/BRM_saves_em" - ls -la "${dbrm_dir}/BRM_saves_vbbm" - ls -la "${dbrm_dir}/BRM_saves_vss" - ls -la "${dbrm_dir}/BRM_saves_journal" - - sleep 2 - + if [ "$prefix" != "BRM_saves" ]; then + printf " - Restoring Prefix: $prefix \n" + vbbm_name="${prefix}_vbbm" + vss_name="${prefix}_vss" + cp -arpf "${dbrm_dir}/$em_file_name" "${dbrm_dir}/BRM_saves_em" + cp -arpf "${dbrm_dir}/$vbbm_name" "${dbrm_dir}/BRM_saves_vbbm" + cp -arpf "${dbrm_dir}/$vss_name" "${dbrm_dir}/BRM_saves_vss" + fi + echo "BRM_saves" > "${dbrm_dir}/BRM_saves_current" + chown -R mysql:mysql "${dbrm_dir}" clearShm - # manually run load_brm - printf "\n\n[+] Running load_brm...\n" + sleep 2 + + manually_run_loadbrm_and_savebrm + + printf "\nAfter DBRM Restore\n" + echo "--------------------------------------------------------------------------" + ls -la "${dbrm_dir}" | grep -E "BRM_saves_em|BRM_saves_vbbm|BRM_saves_vss|BRM_saves_journal|BRM_saves_current" + + if $auto_start; then + printf "\nStartup\n" + echo "--------------------------------------------------------------------------" + start_mariadb_cmapi_columnstore + fi + + # printf "\n[+] Health Check ...\n" + # sleep 2 + # # run a basic health check + # mariadb -e "create database if not exists $backup_folder_to_restore" + # mariadb $backup_folder_to_restore -e "drop table if exists t1" + # mariadb $backup_folder_to_restore -e "create table t1 (a int) engine=columnstore" + # mariadb $backup_folder_to_restore -e "insert into t1 values (1)" + # mariadb $backup_folder_to_restore -e "update t1 set a=1" + # mariadb $backup_folder_to_restore -e "delete from t1 where a=1" + # mariadb -e "drop database if exists $backup_folder_to_restore" + + printf "\nDBRM Restore Complete\n\n" +} + +manually_run_loadbrm_and_savebrm() { + + pf_offset=45 + printf "%-${pf_offset}s ... " " - Running load_brm" if ! sudo -su mysql /usr/bin/load_brm /var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves ; then - echo "[!!] Failed to complete load_brm successfully" + printf "\n[!!] Failed to complete load_brm successfully\n\n" exit 1; fi; + printf "%-${pf_offset}s ... " " - Starting mcs-controllernode" if ! systemctl start mcs-controllernode ; then echo "[!!] Failed to start mcs-controllernode " exit 1; + else + printf "Done\n" fi; - echo "[+] Confirming extent map readable..." + printf "%-${pf_offset}s ... " " - Confirming extent map readable" if ! editem -i >/dev/null ; then echo "[!!] Failed to run editem -i (read the EM)" exit 1; + else + printf "Done\n" fi; - echo "[+] Running save_brm..." + printf "%-${pf_offset}s ... \n" " - Running save_brm" if ! sudo -u mysql /usr/bin/save_brm ; then echo "[!!] Failed to run save_brm" exit 1; fi + printf "%-${pf_offset}s ... " " - Stopping mcs-controllernode" if ! systemctl stop mcs-controllernode; then echo "[!!] Failed to stop mcs-controllernode" exit 1; + else + printf "Done\n" fi - clearShm - - printf "\n[+] Turning on columnstore ...\n" - start_cs_cmapi_via_curl + if [ $storage == "S3" ]; then + printf "%-${pf_offset}s ... " " - Stopping mcs-storagemanager" + if ! systemctl stop mcs-storagemanager ; then + echo "[!!] Failed to stop mcs-storagemanager " + exit 1; + else + printf "Done\n" + fi + fi; + + printf "%-${pf_offset}s ... " " - clearShm" + clearShm + printf "Done\n" + sleep 2 - # run a basic health check - backup_folder_to_restore="derp" - mariadb -e "create database if not exists $backup_folder_to_restore" - # confirm $backup_folder_to_restore doesnt exist - mariadb $backup_folder_to_restore -e "create table t1 (a int) engine=columnstore" - mariadb $backup_folder_to_restore -e "insert into t1 values (1)" - mariadb $backup_folder_to_restore -e "update t1 set a=1" - mariadb $backup_folder_to_restore -e "delete from t1 where a=1" - mariadb -e "drop database $backup_folder_to_restore" } process_backup() -{ +{ load_default_backup_variables; parse_backup_variables "$@"; print_backup_variables; check_for_dependancies "backup"; validation_prechecks_for_backup; + apply_backup_retention_policy issue_write_locks; run_save_brm; run_backup; @@ -3027,27 +3672,37 @@ process_restore() run_restore; } +print_mcs_bk_mgr_version_info() { + echo "MariaDB Columnstore Backup Manager" + echo "Version: $mcs_bk_manager_version" +} + case "$action" in - 'help' | '--help' | '-help' | '-h') + 'help' | '--help' | '-help' | '-h') print_action_help_text ;; - 'backup') + 'backup') process_backup "$@"; ;; - 'dbrm_backup') + 'dbrm_backup') process_dbrm_backup "$@"; ;; - 'dbrm_restore') - # for next release + 'dbrm_restore') process_dbrm_restore "$@"; ;; - 'restore') + 'restore') process_restore "$@"; ;; - *) - printf "\nunknown action: $action\n" + '-v' | 'version' ) + print_mcs_bk_mgr_version_info + ;; + 'source' ) + return 0; + ;; + *) + printf "\nunknown action: $action\n" print_action_help_text ;; esac -exit 0; \ No newline at end of file +exit 0; diff --git a/datatypes/mcs_datatype.cpp b/datatypes/mcs_datatype.cpp index fbc6f5e23..f1a7414de 100644 --- a/datatypes/mcs_datatype.cpp +++ b/datatypes/mcs_datatype.cpp @@ -58,6 +58,41 @@ int128_t SystemCatalog::TypeAttributesStd::decimal128FromString(const std::strin return result; } +// SQL parser checks that given `value` is in a valid format. +// The first symbol can be `-`. The `value` can contain `.` symbol. +void decimalPrecisionAndScale(const utils::NullString& value, int& precision, int& scale) +{ + if (value.isNull()) + { + scale = 0; + precision = -1; + return; + } + + const auto strValue = value.unsafeStringRef(); + if (strValue.empty()) + { + scale = 0; + precision = -1; + return; + } + + const int len = strValue.size(); + const auto dotIndex = strValue.find('.'); + const int minExists = strValue.front() == '-' ? 1 : 0; + + if (dotIndex == std::string::npos) + { + scale = 0; + precision = len - minExists; + } + else + { + scale = len - dotIndex - 1; + precision = len - 1 - minExists; + } +} + int128_t SystemCatalog::TypeAttributesStd::decimal128FromString(const utils::NullString& value, bool* saturate) const { diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index 11ccaa1da..561f3fca4 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -125,6 +125,8 @@ struct WidthToSIntegralType<16> : _WidthToSIntegralType<16, int128_t> { }; +void decimalPrecisionAndScale(const utils::NullString& value, int& precision, int& scale); + class SystemCatalog { public: diff --git a/dbcon/ddlpackageproc/altertableprocessor.cpp b/dbcon/ddlpackageproc/altertableprocessor.cpp index 2863b092e..8c0dd0231 100644 --- a/dbcon/ddlpackageproc/altertableprocessor.cpp +++ b/dbcon/ddlpackageproc/altertableprocessor.cpp @@ -47,6 +47,7 @@ using namespace logging; #include "we_messages.h" #include "we_ddlcommandclient.h" +#include "we_ddlcommon.h" using namespace WriteEngine; #include "oamcache.h" @@ -299,8 +300,8 @@ bool comptypesAreCompat(int oldCtype, int newCtype) namespace ddlpackageprocessor { -AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( - ddlpackage::AlterTableStatement& alterTableStmt) +AlterTableProcessor::DDLResult AlterTableProcessor::processPackageInternal( + ddlpackage::SqlStatement* sqlTableStmt) { SUMMARY_INFO("AlterTableProcessor::processPackage"); @@ -311,6 +312,20 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( result.result = NO_ERROR; std::string err; uint64_t tableLockId = 0; + + auto* alterTableStmt = dynamic_cast(sqlTableStmt); + if (!alterTableStmt) + { + logging::Message::Args args; + logging::Message message(9); + args.add("AlterTableStatement wrong cast"); + message.format(args); + result.result = ALTER_ERROR; + result.message = message; + fSessionManager.rolledback(txnID); + return result; + } + DETAIL_INFO(alterTableStmt); int rc = 0; rc = fDbrm->isReadWrite(); @@ -328,8 +343,8 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( } //@Bug 4538. Log the sql statement before grabbing tablelock - string stmt = alterTableStmt.fSql + "|" + (alterTableStmt.fTableName)->fSchema + "|"; - SQLLogger logger(stmt, fDDLLoggingId, alterTableStmt.fSessionID, txnID.id); + string stmt = alterTableStmt->fSql + "|" + (alterTableStmt->fTableName)->fSchema + "|"; + SQLLogger logger(stmt, fDDLLoggingId, alterTableStmt->fSessionID, txnID.id); VERBOSE_INFO("Getting current txnID"); OamCache* oamcache = OamCache::makeOamCache(); @@ -370,18 +385,18 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( { // check table lock boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(alterTableStmt.fSessionID); + CalpontSystemCatalog::makeCalpontSystemCatalog(alterTableStmt->fSessionID); systemCatalogPtr->identity(CalpontSystemCatalog::EC); - systemCatalogPtr->sessionID(alterTableStmt.fSessionID); + systemCatalogPtr->sessionID(alterTableStmt->fSessionID); CalpontSystemCatalog::TableName tableName; - tableName.schema = (alterTableStmt.fTableName)->fSchema; - tableName.table = (alterTableStmt.fTableName)->fName; + tableName.schema = (alterTableStmt->fTableName)->fSchema; + tableName.table = (alterTableStmt->fTableName)->fName; execplan::CalpontSystemCatalog::ROPair roPair; roPair = systemCatalogPtr->tableRID(tableName); uint32_t processID = ::getpid(); int32_t txnid = txnID.id; - int32_t sessionId = alterTableStmt.fSessionID; + int32_t sessionId = alterTableStmt->fSessionID; std::string processName("DDLProc"); int i = 0; @@ -428,7 +443,7 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( { processID = ::getpid(); txnid = txnID.id; - sessionId = alterTableStmt.fSessionID; + sessionId = alterTableStmt->fSessionID; ; processName = "DDLProc"; tableLockId = fDbrm->getTableLock(pms, roPair.objnum, &processName, &processID, &sessionId, &txnid, @@ -455,7 +470,7 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( } } - ddlpackage::AlterTableActionList actionList = alterTableStmt.fActions; + ddlpackage::AlterTableActionList actionList = alterTableStmt->fActions; AlterTableActionList::const_iterator action_iterator = actionList.begin(); while (action_iterator != actionList.end()) @@ -480,7 +495,7 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( columnDefPtr = addColumns.fColumns[0]; } - addColumn(alterTableStmt.fSessionID, txnID.id, result, columnDefPtr, *(alterTableStmt.fTableName), + addColumn(alterTableStmt->fSessionID, txnID.id, result, columnDefPtr, *(alterTableStmt->fTableName), uniqueId); if (result.result != NO_ERROR) @@ -492,9 +507,9 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( else if (s.find(AlterActionString[6]) != string::npos) { // Drop Column Default - dropColumnDefault(alterTableStmt.fSessionID, txnID.id, result, + dropColumnDefault(alterTableStmt->fSessionID, txnID.id, result, *(dynamic_cast(*action_iterator)), - *(alterTableStmt.fTableName), uniqueId); + *(alterTableStmt->fTableName), uniqueId); if (result.result != NO_ERROR) { @@ -505,15 +520,16 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( else if (s.find(AlterActionString[3]) != string::npos) { // Drop Columns - dropColumns(alterTableStmt.fSessionID, txnID.id, result, - *(dynamic_cast(*action_iterator)), *(alterTableStmt.fTableName), + dropColumns(alterTableStmt->fSessionID, txnID.id, result, + *(dynamic_cast(*action_iterator)), *(alterTableStmt->fTableName), uniqueId); } else if (s.find(AlterActionString[2]) != string::npos) { // Drop a column - dropColumn(alterTableStmt.fSessionID, txnID.id, result, - *(dynamic_cast(*action_iterator)), *(alterTableStmt.fTableName), uniqueId); + dropColumn(alterTableStmt->fSessionID, txnID.id, result, + *(dynamic_cast(*action_iterator)), *(alterTableStmt->fTableName), + uniqueId); } #if 0 @@ -528,9 +544,9 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( else if (s.find(AlterActionString[5]) != string::npos) { // Set Column Default - setColumnDefault(alterTableStmt.fSessionID, txnID.id, result, + setColumnDefault(alterTableStmt->fSessionID, txnID.id, result, *(dynamic_cast(*action_iterator)), - *(alterTableStmt.fTableName), uniqueId); + *(alterTableStmt->fTableName), uniqueId); } #if 0 @@ -546,23 +562,23 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( else if (s.find(AlterActionString[8]) != string::npos) { // Rename Table - renameTable(alterTableStmt.fSessionID, txnID.id, result, - *(dynamic_cast(*action_iterator)), *(alterTableStmt.fTableName), + renameTable(alterTableStmt->fSessionID, txnID.id, result, + *(dynamic_cast(*action_iterator)), *(alterTableStmt->fTableName), uniqueId); } else if (s.find(AlterActionString[10]) != string::npos) { // Rename a Column - renameColumn(alterTableStmt.fSessionID, txnID.id, result, - *(dynamic_cast(*action_iterator)), *(alterTableStmt.fTableName), + renameColumn(alterTableStmt->fSessionID, txnID.id, result, + *(dynamic_cast(*action_iterator)), *(alterTableStmt->fTableName), uniqueId); } else if (s.find(AlterActionString[11]) != string::npos) { // Table Comment - tableComment(alterTableStmt.fSessionID, txnID.id, result, - *(dynamic_cast(*action_iterator)), *(alterTableStmt.fTableName), + tableComment(alterTableStmt->fSessionID, txnID.id, result, + *(dynamic_cast(*action_iterator)), *(alterTableStmt->fTableName), uniqueId); } else @@ -574,7 +590,7 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( } // Log the DDL statement. - logging::logDDL(alterTableStmt.fSessionID, txnID.id, alterTableStmt.fSql, alterTableStmt.fOwner); + logging::logDDL(alterTableStmt->fSessionID, txnID.id, alterTableStmt->fSql, alterTableStmt->fOwner); DETAIL_INFO("Commiting transaction"); commitTransaction(uniqueId, txnID); @@ -582,11 +598,43 @@ AlterTableProcessor::DDLResult AlterTableProcessor::processPackage( } catch (std::exception& ex) { - rollBackAlter(ex.what(), txnID, alterTableStmt.fSessionID, result, uniqueId); + if (checkPPLostConnection(ex.what())) + { + if (tableLockId) + { + try + { + (void)fDbrm->releaseTableLock(tableLockId); + } + catch (std::exception&) + { + if (result.result == NO_ERROR) + { + logging::Message::Args args; + logging::Message message(1); + args.add("Table lock is not released due to "); + args.add(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); + args.add(""); + args.add(""); + message.format(args); + result.result = ALTER_ERROR; + result.message = message; + return result; + } + } + } + result.result = PP_LOST_CONNECTION; + fWEClient->removeQueue(uniqueId); + return result; + } + else + { + rollBackAlter(ex.what(), txnID, alterTableStmt->fSessionID, result, uniqueId); + } } catch (...) { - rollBackAlter("encountered unknown exception. ", txnID, alterTableStmt.fSessionID, result, uniqueId); + rollBackAlter("encountered unknown exception. ", txnID, alterTableStmt->fSessionID, result, uniqueId); } // release table lock @@ -685,10 +733,13 @@ void AlterTableProcessor::addColumn(uint32_t sessionID, execplan::CalpontSystemC throw std::runtime_error(err); } - if ((columnDefPtr->fType->fType == CalpontSystemCatalog::CHAR && columnDefPtr->fType->fLength > 8) || - (columnDefPtr->fType->fType == CalpontSystemCatalog::VARCHAR && columnDefPtr->fType->fLength > 7) || - (columnDefPtr->fType->fType == CalpontSystemCatalog::VARBINARY && columnDefPtr->fType->fLength > 7) || - (columnDefPtr->fType->fType == CalpontSystemCatalog::BLOB)) + int dataType = WriteEngine::convertDataType(columnDefPtr->fType->fType); + + if ((dataType == CalpontSystemCatalog::CHAR && columnDefPtr->fType->fLength > 8) || + (dataType == CalpontSystemCatalog::VARCHAR && columnDefPtr->fType->fLength > 7) || + (dataType == CalpontSystemCatalog::VARBINARY && columnDefPtr->fType->fLength > 7) || + (dataType == CalpontSystemCatalog::TEXT) || + (dataType == CalpontSystemCatalog::BLOB)) { isDict = true; } diff --git a/dbcon/ddlpackageproc/altertableprocessor.h b/dbcon/ddlpackageproc/altertableprocessor.h index 48a01a8a6..55868ec69 100644 --- a/dbcon/ddlpackageproc/altertableprocessor.h +++ b/dbcon/ddlpackageproc/altertableprocessor.h @@ -39,11 +39,6 @@ class AlterTableProcessor : public DDLPackageProcessor AlterTableProcessor(BRM::DBRM* aDbrm) : DDLPackageProcessor(aDbrm) { } - /** @brief process an alter table statement - * - * @param alterTableStmt the AlterTableStatement - */ - EXPORT DDLResult processPackage(ddlpackage::AlterTableStatement& alterTableStmt); /** @brief add a physical column file * * @param result the result of the operation @@ -151,6 +146,11 @@ class AlterTableProcessor : public DDLPackageProcessor uint64_t uniqueId); private: + /** @brief process an alter table statement + * + * @param alterTableStmt the AlterTableStatement + */ + DDLResult processPackageInternal(ddlpackage::SqlStatement* alterTableStmt) override; }; } // namespace ddlpackageprocessor diff --git a/dbcon/ddlpackageproc/createindexprocessor.cpp b/dbcon/ddlpackageproc/createindexprocessor.cpp index 706d4339e..cb92f65ea 100644 --- a/dbcon/ddlpackageproc/createindexprocessor.cpp +++ b/dbcon/ddlpackageproc/createindexprocessor.cpp @@ -37,8 +37,8 @@ using namespace logging; using namespace BRM; namespace ddlpackageprocessor { -CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( - ddlpackage::CreateIndexStatement& createIndexStmt) +CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackageInternal( + ddlpackage::SqlStatement* sqlStmt) { /* get OIDs for the list & tree files @@ -53,6 +53,19 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( DDLResult result; result.result = NO_ERROR; + auto* createIndexStmt = dynamic_cast(sqlStmt); + if (!createIndexStmt) + { + logging::Message::Args args; + logging::Message message(9); + args.add("CreateIndexStatement wrong cast "); + message.format(args); + + result.result = CREATE_ERROR; + result.message = message; + return result; + } + DETAIL_INFO(createIndexStmt); BRM::TxnID txnID; @@ -62,11 +75,11 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( This is based on the assumption that Front end is already error out if the user trys to create index on non-existing table. */ CalpontSystemCatalog::TableName tableName; - tableName.schema = (createIndexStmt.fTableName)->fSchema; - tableName.table = (createIndexStmt.fTableName)->fName; + tableName.schema = (createIndexStmt->fTableName)->fSchema; + tableName.table = (createIndexStmt->fTableName)->fName; CalpontSystemCatalog::ROPair roPair; boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(createIndexStmt.fSessionID); + CalpontSystemCatalog::makeCalpontSystemCatalog(createIndexStmt->fSessionID); try { @@ -75,7 +88,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( catch (exception& ex) { // store primary key name in fPKName - fPKName = createIndexStmt.fIndexName->fName; + fPKName = createIndexStmt->fIndexName->fName; return result; } catch (...) @@ -88,10 +101,10 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( return result; } - fPKName = createIndexStmt.fIndexName->fName; + fPKName = createIndexStmt->fIndexName->fName; int err = 0; - SQLLogger logger(createIndexStmt.fSql, fDDLLoggingId, createIndexStmt.fSessionID, txnID.id); + SQLLogger logger(createIndexStmt->fSql, fDDLLoggingId, createIndexStmt->fSessionID, txnID.id); VERBOSE_INFO("Allocating object IDs for columns"); @@ -102,31 +115,31 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( VERBOSE_INFO("Starting a new transaction"); ddlpackage::DDL_CONSTRAINTS type = - createIndexStmt.fUnique ? ddlpackage::DDL_UNIQUE : ddlpackage::DDL_INVALID_CONSTRAINT; + createIndexStmt->fUnique ? ddlpackage::DDL_UNIQUE : ddlpackage::DDL_INVALID_CONSTRAINT; VERBOSE_INFO("Writing meta data to SYSINDEX"); bool multicol = false; - if (createIndexStmt.fColumnNames.size() > 1) + if (createIndexStmt->fColumnNames.size() > 1) { multicol = true; } // validate index columns CalpontSystemCatalog::TableColName tableColName; - tableColName.schema = (createIndexStmt.fTableName)->fSchema; - tableColName.table = (createIndexStmt.fTableName)->fName; + tableColName.schema = (createIndexStmt->fTableName)->fSchema; + tableColName.table = (createIndexStmt->fTableName)->fName; CalpontSystemCatalog::OID oid; CalpontSystemCatalog::ColType colType; ColumnNameList::const_iterator colIter; int totalWidth = 0; - DDLIndexPopulator pop(&fWriteEngine, &fSessionManager, createIndexStmt.fSessionID, txnID.id, result, - fIdxOID, createIndexStmt.fColumnNames, *createIndexStmt.fTableName, type, + DDLIndexPopulator pop(&fWriteEngine, &fSessionManager, createIndexStmt->fSessionID, txnID.id, result, + fIdxOID, createIndexStmt->fColumnNames, *(createIndexStmt->fTableName), type, getDebugLevel()); if (multicol) { - for (colIter = createIndexStmt.fColumnNames.begin(); colIter != createIndexStmt.fColumnNames.end(); + for (colIter = createIndexStmt->fColumnNames.begin(); colIter != createIndexStmt->fColumnNames.end(); colIter++) { tableColName.column = *colIter; @@ -167,7 +180,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( // writeSysIndexColMetaData(createIndexStmt.fSessionID, txnID.id, result,*createIndexStmt.fTableName, // createIndexStmt.fColumnNames, createIndexStmt.fIndexName->fName ); - if (createIndexStmt.fUnique) + if (createIndexStmt->fUnique) { VERBOSE_INFO("Writing column constraint meta data to SYSCONSTRAINT"); WriteEngine::ColStruct colStruct; @@ -189,7 +202,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( // get the columns for the SYSCONSTRAINT table ColumnList sysConsColumns; ColumnList::const_iterator sysCons_iterator; - getColumnsForTable(createIndexStmt.fSessionID, sysConsTableName.schema, sysConsTableName.table, + getColumnsForTable(createIndexStmt->fSessionID, sysConsTableName.schema, sysConsTableName.table, sysConsColumns); sysCons_iterator = sysConsColumns.begin(); std::string idxData; @@ -201,17 +214,17 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( if (CONSTRAINTNAME_COL == column.tableColName.column) { - idxData = createIndexStmt.fIndexName->fName; + idxData = createIndexStmt->fIndexName->fName; colTuple.data = idxData; } else if (SCHEMA_COL == column.tableColName.column) { - idxData = (createIndexStmt.fTableName)->fSchema; + idxData = (createIndexStmt->fTableName)->fSchema; colTuple.data = idxData; } else if (TABLENAME_COL == column.tableColName.column) { - idxData = (createIndexStmt.fTableName)->fName; + idxData = (createIndexStmt->fTableName)->fName; colTuple.data = idxData; } else if (CONSTRAINTTYPE_COL == column.tableColName.column) @@ -233,7 +246,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( } else if (INDEXNAME_COL == column.tableColName.column) { - idxData = createIndexStmt.fIndexName->fName; + idxData = createIndexStmt->fIndexName->fName; colTuple.data = idxData; } else @@ -271,7 +284,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( if (error != WriteEngine::NO_ERROR) { return rollBackCreateIndex(errorString("WE: Error inserting Column Record: ", error), txnID, - createIndexStmt.fSessionID); + createIndexStmt->fSessionID); // logging::Message::Args args; // logging::Message message(9); // args.add("Error updating: "); @@ -305,7 +318,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( // get the columns for the SYSCONSTRAINTCOL table ColumnList sysConsColColumns; ColumnList::const_iterator sysConsCol_iterator; - getColumnsForTable(createIndexStmt.fSessionID, sysConsColTableName.schema, sysConsColTableName.table, + getColumnsForTable(createIndexStmt->fSessionID, sysConsColTableName.schema, sysConsColTableName.table, sysConsColColumns); // write sysconstraintcol sysConsCol_iterator = sysConsColColumns.begin(); @@ -319,22 +332,22 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( if (SCHEMA_COL == column.tableColName.column) { - colData = (createIndexStmt.fTableName)->fSchema; + colData = (createIndexStmt->fTableName)->fSchema; colTupleCol.data = colData; } else if (TABLENAME_COL == column.tableColName.column) { - colData = (createIndexStmt.fTableName)->fName; + colData = (createIndexStmt->fTableName)->fName; colTupleCol.data = colData; } else if (COLNAME_COL == column.tableColName.column) { - colData = createIndexStmt.fColumnNames[0]; + colData = createIndexStmt->fColumnNames[0]; colTupleCol.data = colData; } else if (CONSTRAINTNAME_COL == column.tableColName.column) { - colData = createIndexStmt.fIndexName->fName; + colData = createIndexStmt->fIndexName->fName; colTupleCol.data = colData; } else @@ -372,7 +385,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( if (error != WriteEngine::NO_ERROR) { return rollBackCreateIndex(errorString("WE: Error inserting Column Record: ", error), txnID, - createIndexStmt.fSessionID); + createIndexStmt->fSessionID); /* logging::Message::Args args; logging::Message message(9); @@ -398,7 +411,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( if (err) { return rollBackCreateIndex(errorString("Write engine failed to create the new index. ", err), txnID, - createIndexStmt.fSessionID); + createIndexStmt->fSessionID); } // new if BULK_LOAD close @@ -407,11 +420,11 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( if (err) { return rollBackCreateIndex(errorString("Failed to populate index with current data. ", err), txnID, - createIndexStmt.fSessionID); + createIndexStmt->fSessionID); } // Log the DDL statement. - logging::logDDL(createIndexStmt.fSessionID, txnID.id, createIndexStmt.fSql, createIndexStmt.fOwner); + logging::logDDL(createIndexStmt->fSessionID, txnID.id, createIndexStmt->fSql, createIndexStmt->fOwner); DETAIL_INFO("Commiting transaction"); err = fWriteEngine.commit(txnID.id); @@ -419,7 +432,7 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( if (err) { return rollBackCreateIndex(errorString("Failed to commit the create index transaction. ", err), txnID, - createIndexStmt.fSessionID); + createIndexStmt->fSessionID); } fSessionManager.committed(txnID); @@ -428,12 +441,12 @@ CreateIndexProcessor::DDLResult CreateIndexProcessor::processPackage( catch (exception& ex) { - result = rollBackCreateIndex(ex.what(), txnID, createIndexStmt.fSessionID); + result = rollBackCreateIndex(ex.what(), txnID, createIndexStmt->fSessionID); } catch (...) { string msg("CreateIndexProcessor::processPackage: caught unknown exception!"); - result = rollBackCreateIndex(msg, txnID, createIndexStmt.fSessionID); + result = rollBackCreateIndex(msg, txnID, createIndexStmt->fSessionID); } return result; diff --git a/dbcon/ddlpackageproc/createindexprocessor.h b/dbcon/ddlpackageproc/createindexprocessor.h index bead73b5f..76ff1179d 100644 --- a/dbcon/ddlpackageproc/createindexprocessor.h +++ b/dbcon/ddlpackageproc/createindexprocessor.h @@ -39,7 +39,7 @@ class CreateIndexProcessor : public DDLPackageProcessor * * @param createIndexStmt the create index statement */ - DDLResult processPackage(ddlpackage::CreateIndexStatement& createIndexStmt); + DDLResult processPackageInternal(ddlpackage::SqlStatement* createIndexStmt); protected: DDLResult rollBackCreateIndex(const std::string& error, BRM::TxnID& txnID, int sessionId); diff --git a/dbcon/ddlpackageproc/createtableprocessor.cpp b/dbcon/ddlpackageproc/createtableprocessor.cpp index 77263ad1c..0b9f68f00 100644 --- a/dbcon/ddlpackageproc/createtableprocessor.cpp +++ b/dbcon/ddlpackageproc/createtableprocessor.cpp @@ -47,8 +47,8 @@ using namespace logging; namespace ddlpackageprocessor { -CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( - ddlpackage::CreateTableStatement& createTableStmt) +CreateTableProcessor::DDLResult CreateTableProcessor::processPackageInternal( + ddlpackage::SqlStatement* sqlStmt) { SUMMARY_INFO("CreateTableProcessor::processPackage"); @@ -72,8 +72,22 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( return result; } + ddlpackage::CreateTableStatement* createTableStmt = + dynamic_cast(sqlStmt); + + if (!createTableStmt) + { + Message::Args args; + Message message(9); + args.add("CreateTableStatement wrong cast"); + message.format(args); + result.result = CREATE_ERROR; + result.message = message; + return result; + } + DETAIL_INFO(createTableStmt); - ddlpackage::TableDef& tableDef = *createTableStmt.fTableDef; + ddlpackage::TableDef& tableDef = *(createTableStmt->fTableDef); // If schema = CALPONTSYS, do not create table if (tableDef.fQualifiedName->fSchema == CALPONT_SCHEMA) @@ -89,7 +103,7 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( // Check whether the table is existed already boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(createTableStmt.fSessionID); + CalpontSystemCatalog::makeCalpontSystemCatalog(createTableStmt->fSessionID); execplan::CalpontSystemCatalog::TableName tableName; tableName.schema = tableDef.fQualifiedName->fSchema; tableName.table = tableDef.fQualifiedName->fName; @@ -105,36 +119,44 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( } catch (IDBExcept& ie) { - // TODO: What is and is not an error here? - if (ie.errorCode() == ERR_DATA_OFFLINE) + if (checkPPLostConnection(ie.what())) { - // release transaction - fSessionManager.rolledback(txnID); - // Return the error for display to user - Message::Args args; - Message message(9); - args.add(ie.what()); - message.format(args); - result.result = CREATE_ERROR; - result.message = message; + result.result = NETWORK_ERROR; return result; } - else if (ie.errorCode() == ERR_TABLE_NOT_IN_CATALOG) + else { - roPair.objnum = 0; - } - else // error out - { - // release transaction - fSessionManager.rolledback(txnID); - // Return the error for display to user - Message::Args args; - Message message(9); - args.add(ie.what()); - message.format(args); - result.result = CREATE_ERROR; - result.message = message; - return result; + // TODO: What is and is not an error here? + if (ie.errorCode() == ERR_DATA_OFFLINE) + { + // release transaction + fSessionManager.rolledback(txnID); + // Return the error for display to user + Message::Args args; + Message message(9); + args.add(ie.what()); + message.format(args); + result.result = CREATE_ERROR; + result.message = message; + return result; + } + else if (ie.errorCode() == ERR_TABLE_NOT_IN_CATALOG) + { + roPair.objnum = 0; + } + else // error out + { + // release transaction + fSessionManager.rolledback(txnID); + // Return the error for display to user + Message::Args args; + Message message(9); + args.add(ie.what()); + message.format(args); + result.result = CREATE_ERROR; + result.message = message; + return result; + } } } catch (std::exception& ex) // error out @@ -185,8 +207,8 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( // Start a new transaction VERBOSE_INFO("Starting a new transaction"); - string stmt = createTableStmt.fSql + "|" + tableDef.fQualifiedName->fSchema + "|"; - SQLLogger logger(stmt, fDDLLoggingId, createTableStmt.fSessionID, txnID.id); + string stmt = createTableStmt->fSql + "|" + tableDef.fQualifiedName->fSchema + "|"; + SQLLogger logger(stmt, fDDLLoggingId, createTableStmt->fSessionID, txnID.id); std::string err; execplan::ObjectIDManager fObjectIDManager; @@ -255,7 +277,7 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( #endif uint32_t numColumnOids = numColumns + numDictCols; - numColumnOids += 1; // MCOL-5021 + numColumnOids += 1; // MCOL-5021 if (fStartingColOID < 0) { @@ -276,11 +298,11 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( ByteStream bytestream; bytestream << (ByteStream::byte)WE_SVR_WRITE_SYSTABLE; bytestream << uniqueId; - bytestream << (uint32_t)createTableStmt.fSessionID; + bytestream << (uint32_t)createTableStmt->fSessionID; bytestream << (uint32_t)txnID.id; bytestream << (uint32_t)fStartingColOID; bytestream << (uint32_t)(fStartingColOID + numColumnOids); - bytestream << (uint32_t)createTableStmt.fTableWithAutoi; + bytestream << (uint32_t)createTableStmt->fTableWithAutoi; uint16_t dbRoot; BRM::OID_t sysOid = 1001; // Find out where systable is @@ -375,7 +397,7 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( if (rc != NETWORK_ERROR) { - rollBackTransaction(uniqueId, txnID, createTableStmt.fSessionID); // What to do with the error code + rollBackTransaction(uniqueId, txnID, createTableStmt->fSessionID); // What to do with the error code } // release transaction @@ -387,7 +409,7 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( bytestream.restart(); bytestream << (ByteStream::byte)WE_SVR_WRITE_CREATE_SYSCOLUMN; bytestream << uniqueId; - bytestream << (uint32_t)createTableStmt.fSessionID; + bytestream << (uint32_t)createTableStmt->fSessionID; bytestream << (uint32_t)txnID.id; bytestream << numColumns; @@ -486,22 +508,31 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( #ifdef IDB_DDL_DEBUG cout << fTxnid.id << " Create table WE_SVR_WRITE_CREATE_SYSCOLUMN: " << errorMsg << endl; #endif - result.result = (ResultCode)rc; - Message::Args args; - Message message(9); - args.add("(3)Create table failed due to "); - args.add(errorMsg); - message.format(args); - result.message = message; - - if (rc != NETWORK_ERROR) + if (checkPPLostConnection(errorMsg)) { - rollBackTransaction(uniqueId, txnID, createTableStmt.fSessionID); // What to do with the error code + result.result = PP_LOST_CONNECTION; + return result; } + else + { + result.result = (ResultCode)rc; + Message::Args args; + Message message(9); + args.add("(3)Create table failed due to "); + args.add(errorMsg); + message.format(args); + result.message = message; - // release transaction - fSessionManager.rolledback(txnID); - return result; + if (rc != NETWORK_ERROR) + { + rollBackTransaction(uniqueId, txnID, + createTableStmt->fSessionID); // What to do with the error code + } + + // release transaction + fSessionManager.rolledback(txnID); + return result; + } } // Get the number of tables in the database, the current table is included. @@ -627,7 +658,7 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( if (rc != NETWORK_ERROR) { - rollBackTransaction(uniqueId, txnID, createTableStmt.fSessionID); // What to do with the error code + rollBackTransaction(uniqueId, txnID, createTableStmt->fSessionID); // What to do with the error code } // release transaction @@ -716,7 +747,7 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( #ifdef IDB_DDL_DEBUG cout << fTxnid.id << " Create table We_SVR_WRITE_CREATETABLEFILES: " << errorMsg << endl; #endif - rollBackTransaction(uniqueId, txnID, createTableStmt.fSessionID); // What to do with the error code + rollBackTransaction(uniqueId, txnID, createTableStmt->fSessionID); // What to do with the error code fSessionManager.rolledback(txnID); } else @@ -728,7 +759,7 @@ CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( } // Log the DDL statement. - logDDL(createTableStmt.fSessionID, txnID.id, createTableStmt.fSql, createTableStmt.fOwner); + logDDL(createTableStmt->fSessionID, txnID.id, createTableStmt->fSql, createTableStmt->fOwner); } catch (std::exception& ex) { diff --git a/dbcon/ddlpackageproc/createtableprocessor.h b/dbcon/ddlpackageproc/createtableprocessor.h index 09b3682a0..48d14bb08 100644 --- a/dbcon/ddlpackageproc/createtableprocessor.h +++ b/dbcon/ddlpackageproc/createtableprocessor.h @@ -39,17 +39,17 @@ class CreateTableProcessor : public DDLPackageProcessor CreateTableProcessor(BRM::DBRM* aDbrm) : DDLPackageProcessor(aDbrm) { } - /** @brief process a create table statement - * - * @param createTableStmt the CreateTableStatement - */ - EXPORT DDLResult processPackage(ddlpackage::CreateTableStatement& createTableStmt); protected: void rollBackCreateTable(const std::string& error, BRM::TxnID txnID, int sessionId, ddlpackage::TableDef& tableDef, DDLResult& result); private: + /** @brief process a create table statement + * + * @param createTableStmt the CreateTableStatement + */ + DDLResult processPackageInternal(ddlpackage::SqlStatement* sqlTableStmt); }; } // namespace ddlpackageprocessor diff --git a/dbcon/ddlpackageproc/ddlpackageprocessor.cpp b/dbcon/ddlpackageproc/ddlpackageprocessor.cpp index 6d6bff705..07a663b51 100644 --- a/dbcon/ddlpackageproc/ddlpackageprocessor.cpp +++ b/dbcon/ddlpackageproc/ddlpackageprocessor.cpp @@ -562,10 +562,9 @@ void DDLPackageProcessor::createFiles(CalpontSystemCatalog::TableName aTableName { SUMMARY_INFO("DDLPackageProcessor::createFiles"); boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(1); + CalpontSystemCatalog::makeCalpontSystemCatalog(1); CalpontSystemCatalog::RIDList ridList = systemCatalogPtr->columnRIDs(aTableName); - CalpontSystemCatalog::OID tableAUXColOid = - systemCatalogPtr->tableAUXColumnOID(aTableName); + CalpontSystemCatalog::OID tableAUXColOid = systemCatalogPtr->tableAUXColumnOID(aTableName); if (tableAUXColOid > 3000) { @@ -1127,6 +1126,37 @@ void DDLPackageProcessor::createWriteTruncateTableLogFile( throw std::runtime_error(errorMsg); } +DDLPackageProcessor::DDLResult DDLPackageProcessor::processPackage(SqlStatement* sqlStmt) +{ + auto result = processPackageInternal(sqlStmt); + uint32_t tries = 0; + while ((result.result == PP_LOST_CONNECTION) && (tries < 5)) + { + std::cerr << "DDLPackageProcessor: NETWORK ERROR; attempt # " << tries << std::endl; + joblist::ResourceManager* rm = joblist::ResourceManager::instance(true); + joblist::DistributedEngineComm* fEc = joblist::DistributedEngineComm::instance(rm); + if (fEc->Setup()) + return result; + + result = processPackageInternal(sqlStmt); + ++tries; + } + return result; +} + +DDLPackageProcessor::DDLResult DDLPackageProcessor::processPackageInternal(SqlStatement* sqlStmt) +{ + // This should not be called. + DDLPackageProcessor::DDLResult result; + result.result = NOT_ACCEPTING_PACKAGES; + return result; +} + +bool DDLPackageProcessor::checkPPLostConnection(std::string error) +{ + return error.find(PPLostConnectionErrorCode) != std::string::npos; +} + void DDLPackageProcessor::returnOIDs(execplan::CalpontSystemCatalog::RIDList& ridList, execplan::CalpontSystemCatalog::DictOIDList& dictOIDList) { diff --git a/dbcon/ddlpackageproc/ddlpackageprocessor.h b/dbcon/ddlpackageproc/ddlpackageprocessor.h index 83fb33b43..6b3ccdf07 100644 --- a/dbcon/ddlpackageproc/ddlpackageprocessor.h +++ b/dbcon/ddlpackageproc/ddlpackageprocessor.h @@ -93,7 +93,8 @@ class DDLPackageProcessor NETWORK_ERROR, PARTITION_WARNING, WARN_NO_PARTITION, - DROP_TABLE_NOT_IN_CATALOG_ERROR + DROP_TABLE_NOT_IN_CATALOG_ERROR, + PP_LOST_CONNECTION }; enum DebugLevel /** @brief Debug level type enumeration */ @@ -195,42 +196,6 @@ class DDLPackageProcessor int spare : 6; Date( ) { year = 0; month = 0; day = 0; spare = 0;} }; */ - /** @brief a structure to hold a datetime - */ - struct dateTime - { - unsigned msecond : 20; - unsigned second : 6; - unsigned minute : 6; - unsigned hour : 6; - unsigned day : 6; - unsigned month : 4; - unsigned year : 16; - // NULL column value = 0xFFFFFFFFFFFFFFFE - EXPORT dateTime() - { - year = 0xFFFF; - month = 0xF; - day = 0x3F; - hour = 0x3F; - minute = 0x3F; - second = 0x3F; - msecond = 0xFFFFE; - } - }; - /* - struct dateTime - { - int year : 16; - int month : 4; - int day : 6; - int hour : 6; - int minute : 6; - int second : 6; - int msecond : 20; - dateTime( ) { year = 0; month = 0; day = 0; hour = 0; minute = 0; second = 0; msecond = 0; } - } - ; */ /** @brief a vector of dictionary object ids */ typedef std::vector DictionaryOIDList; @@ -283,6 +248,18 @@ class DDLPackageProcessor // std::cout << "in DDLPackageProcessor constructor " << this << std::endl; } + /** @brief Function wrapper for `processPackageInternal`. + */ + DDLResult processPackage(ddlpackage::SqlStatement* sqlStmt); + + /** @brief Check that give exception is related to PP lost connection. + */ + bool checkPPLostConnection(std::string error); + + /** @brief Internal implementation for `process` package command. + */ + virtual DDLResult processPackageInternal(ddlpackage::SqlStatement* sqlStmt); + /** @brief destructor */ EXPORT virtual ~DDLPackageProcessor(); @@ -409,6 +386,8 @@ class DDLPackageProcessor */ EXPORT void fetchLogFile(TableLogInfo& tableLogInfos, uint64_t uniqueId); + // virtual EXPORT DDLResult processPackage(ddlpackage::TruncTableStatement& truncTableStmt); + BRM::TxnID fTxnid; protected: @@ -871,6 +850,8 @@ class DDLPackageProcessor void cleanString(std::string& s); // std::string fDDLLogFileName; DebugLevel fDebugLevel; // internal use debug level + + const std::string PPLostConnectionErrorCode = "MCS-2045"; }; /** @brief helper template function to do safe from string to type conversions * @@ -885,4 +866,3 @@ bool from_string(T& t, const std::string& s, std::ios_base& (*f)(std::ios_base&) } // namespace ddlpackageprocessor #undef EXPORT - diff --git a/dbcon/ddlpackageproc/dropindexprocessor.cpp b/dbcon/ddlpackageproc/dropindexprocessor.cpp index e1b5720c8..65d12d441 100644 --- a/dbcon/ddlpackageproc/dropindexprocessor.cpp +++ b/dbcon/ddlpackageproc/dropindexprocessor.cpp @@ -30,8 +30,7 @@ using namespace logging; namespace ddlpackageprocessor { -DropIndexProcessor::DDLResult DropIndexProcessor::processPackage( - ddlpackage::DropIndexStatement& dropIndexStmt) +DropIndexProcessor::DDLResult DropIndexProcessor::processPackageInternal(ddlpackage::SqlStatement* sqlStmt) { SUMMARY_INFO("DropIndexProcessor::processPackage"); @@ -49,20 +48,32 @@ DropIndexProcessor::DDLResult DropIndexProcessor::processPackage( int err = 0; + auto* dropIndexStmt = dynamic_cast(sqlStmt); + if (!dropIndexStmt) + { + Message::Args args; + Message message(9); + args.add("DropIndexStatement wrong cast"); + message.format(args); + result.result = DROP_ERROR; + result.message = message; + return result; + } + VERBOSE_INFO(dropIndexStmt); - SQLLogger logger(dropIndexStmt.fSql, fDDLLoggingId, dropIndexStmt.fSessionID, txnID.id); + SQLLogger logger(dropIndexStmt->fSql, fDDLLoggingId, dropIndexStmt->fSessionID, txnID.id); - indexName.schema = dropIndexStmt.fIndexName->fSchema; - indexName.index = dropIndexStmt.fIndexName->fName; + indexName.schema = dropIndexStmt->fIndexName->fSchema; + indexName.index = dropIndexStmt->fIndexName->fName; // Look up table name from indexname. Oracle will error out if same constraintname or indexname exists. - CalpontSystemCatalog::TableName tableName = - sysCatalogPtr->lookupTableForIndex(dropIndexStmt.fIndexName->fName, dropIndexStmt.fIndexName->fSchema); + CalpontSystemCatalog::TableName tableName = sysCatalogPtr->lookupTableForIndex( + dropIndexStmt->fIndexName->fName, dropIndexStmt->fIndexName->fSchema); indexName.table = tableName.table; indexOID = sysCatalogPtr->lookupIndexNbr(indexName); VERBOSE_INFO("Removing the SYSINDEX meta data"); - removeSysIndexMetaData(dropIndexStmt.fSessionID, txnID.id, result, *dropIndexStmt.fIndexName); + removeSysIndexMetaData(dropIndexStmt->fSessionID, txnID.id, result, *(dropIndexStmt->fIndexName)); if (result.result != NO_ERROR) { @@ -71,7 +82,7 @@ DropIndexProcessor::DDLResult DropIndexProcessor::processPackage( } VERBOSE_INFO("Removing the SYSINDEXCOL meta data"); - removeSysIndexColMetaData(dropIndexStmt.fSessionID, txnID.id, result, *dropIndexStmt.fIndexName); + removeSysIndexColMetaData(dropIndexStmt->fSessionID, txnID.id, result, *(dropIndexStmt->fIndexName)); if (result.result != NO_ERROR) { @@ -89,7 +100,7 @@ DropIndexProcessor::DDLResult DropIndexProcessor::processPackage( } // Log the DDL statement - logging::logDDL(dropIndexStmt.fSessionID, txnID.id, dropIndexStmt.fSql, dropIndexStmt.fOwner); + logging::logDDL(dropIndexStmt->fSessionID, txnID.id, dropIndexStmt->fSql, dropIndexStmt->fOwner); // register the changes err = fWriteEngine.commit(txnID.id); @@ -106,7 +117,7 @@ DropIndexProcessor::DDLResult DropIndexProcessor::processPackage( return result; rollback: - fWriteEngine.rollbackTran(txnID.id, dropIndexStmt.fSessionID); + fWriteEngine.rollbackTran(txnID.id, dropIndexStmt->fSessionID); fSessionManager.rolledback(txnID); return result; } diff --git a/dbcon/ddlpackageproc/dropindexprocessor.h b/dbcon/ddlpackageproc/dropindexprocessor.h index 7baa4635d..b92c569ba 100644 --- a/dbcon/ddlpackageproc/dropindexprocessor.h +++ b/dbcon/ddlpackageproc/dropindexprocessor.h @@ -38,7 +38,7 @@ class DropIndexProcessor : public DDLPackageProcessor * * @param dropIndexStmt the drop index statement */ - DDLResult processPackage(ddlpackage::DropIndexStatement& dropIndexStmt); + DDLResult processPackageInternal(ddlpackage::SqlStatement& dropIndexStmt); protected: private: diff --git a/dbcon/ddlpackageproc/droppartitionprocessor.cpp b/dbcon/ddlpackageproc/droppartitionprocessor.cpp index dbf297e20..f5fd55747 100644 --- a/dbcon/ddlpackageproc/droppartitionprocessor.cpp +++ b/dbcon/ddlpackageproc/droppartitionprocessor.cpp @@ -37,20 +37,14 @@ using namespace oam; namespace ddlpackageprocessor { -DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( - ddlpackage::DropPartitionStatement& dropPartitionStmt) +DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackageInternal( + ddlpackage::SqlStatement* sqlStmt) { SUMMARY_INFO("DropPartitionProcessor::processPackage"); DDLResult result; result.result = NO_ERROR; std::string err; - VERBOSE_INFO(dropPartitionStmt); - - // Commit current transaction. - // all DDL statements cause an implicit commit - VERBOSE_INFO("Getting current txnID"); - int rc = 0; rc = fDbrm->isReadWrite(); BRM::TxnID txnID; @@ -69,6 +63,23 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( return result; } + auto* dropPartitionStmt = dynamic_cast(sqlStmt); + if (!dropPartitionStmt) + { + logging::Message::Args args; + logging::Message message(9); + args.add("DropPartitionStatement wrong cast"); + message.format(args); + result.result = ALTER_ERROR; + result.message = message; + return result; + } + + VERBOSE_INFO(dropPartitionStmt); + // Commit current transaction. + // all DDL statements cause an implicit commit + VERBOSE_INFO("Getting current txnID"); + std::vector oidList; CalpontSystemCatalog::OID tableAuxColOid; CalpontSystemCatalog::RIDList tableColRidList; @@ -76,7 +87,7 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( execplan::CalpontSystemCatalog::ROPair roPair; uint32_t processID = 0; uint64_t uniqueID = 0; - uint32_t sessionID = dropPartitionStmt.fSessionID; + uint32_t sessionID = dropPartitionStmt->fSessionID; std::string processName("DDLProc"); uint64_t uniqueId = 0; @@ -108,19 +119,19 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( return result; } - string stmt = dropPartitionStmt.fSql + "|" + dropPartitionStmt.fTableName->fSchema + "|"; + string stmt = dropPartitionStmt->fSql + "|" + dropPartitionStmt->fTableName->fSchema + "|"; SQLLogger logger(stmt, fDDLLoggingId, sessionID, txnID.id); try { // check table lock boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(dropPartitionStmt.fSessionID); + CalpontSystemCatalog::makeCalpontSystemCatalog(dropPartitionStmt->fSessionID); systemCatalogPtr->identity(CalpontSystemCatalog::EC); - systemCatalogPtr->sessionID(dropPartitionStmt.fSessionID); + systemCatalogPtr->sessionID(dropPartitionStmt->fSessionID); CalpontSystemCatalog::TableName tableName; - tableName.schema = dropPartitionStmt.fTableName->fSchema; - tableName.table = dropPartitionStmt.fTableName->fName; + tableName.schema = dropPartitionStmt->fTableName->fSchema; + tableName.table = dropPartitionStmt->fTableName->fName; roPair = systemCatalogPtr->tableRID(tableName); //@Bug 3054 check for system catalog @@ -177,7 +188,7 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( } while (nanosleep(&abs_ts, &rm_ts) < 0); // reset - sessionID = dropPartitionStmt.fSessionID; + sessionID = dropPartitionStmt->fSessionID; txnID.id = fTxnid.id; txnID.valid = fTxnid.valid; processID = ::getpid(); @@ -224,8 +235,8 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( // 7. Remove the extents from extentmap for the partition CalpontSystemCatalog::TableName userTableName; - userTableName.schema = dropPartitionStmt.fTableName->fSchema; - userTableName.table = dropPartitionStmt.fTableName->fName; + userTableName.schema = dropPartitionStmt->fTableName->fSchema; + userTableName.table = dropPartitionStmt->fTableName->fName; tableColRidList = systemCatalogPtr->columnRIDs(userTableName); tableAuxColOid = systemCatalogPtr->tableAUXColumnOID(userTableName); @@ -252,7 +263,7 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( // Mark the partition disabled from extent map string emsg; - rc = fDbrm->markPartitionForDeletion(oidList, dropPartitionStmt.fPartitions, emsg); + rc = fDbrm->markPartitionForDeletion(oidList, dropPartitionStmt->fPartitions, emsg); if (rc != 0 && rc != BRM::ERR_PARTITION_DISABLED && rc != BRM::ERR_INVALID_OP_LAST_PARTITION && rc != BRM::ERR_NOT_EXIST_PARTITION) @@ -277,7 +288,7 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( set::iterator it; - for (it = dropPartitionStmt.fPartitions.begin(); it != dropPartitionStmt.fPartitions.end(); ++it) + for (it = dropPartitionStmt->fPartitions.begin(); it != dropPartitionStmt->fPartitions.end(); ++it) { if (outOfServicePartitions.find(*it) != outOfServicePartitions.end()) markedPartitions.insert(*it); @@ -293,7 +304,7 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( // Remove the partition from extent map emsg.clear(); - rc = fDbrm->deletePartition(oidList, dropPartitionStmt.fPartitions, emsg); + rc = fDbrm->deletePartition(oidList, dropPartitionStmt->fPartitions, emsg); if (rc != 0) throw std::runtime_error(emsg); @@ -359,7 +370,8 @@ DropPartitionProcessor::DDLResult DropPartitionProcessor::processPackage( } // Log the DDL statement - logging::logDDL(dropPartitionStmt.fSessionID, txnID.id, dropPartitionStmt.fSql, dropPartitionStmt.fOwner); + logging::logDDL(dropPartitionStmt->fSessionID, txnID.id, dropPartitionStmt->fSql, + dropPartitionStmt->fOwner); // Remove the log file // release the transaction diff --git a/dbcon/ddlpackageproc/droppartitionprocessor.h b/dbcon/ddlpackageproc/droppartitionprocessor.h index 2c23fbd31..6c0b66d39 100644 --- a/dbcon/ddlpackageproc/droppartitionprocessor.h +++ b/dbcon/ddlpackageproc/droppartitionprocessor.h @@ -39,14 +39,14 @@ class DropPartitionProcessor : public DDLPackageProcessor DropPartitionProcessor(BRM::DBRM* aDbrm) : DDLPackageProcessor(aDbrm) { } + + protected: + private: /** @brief process a drop table statement * * @param dropTableStmt the drop table statement */ - EXPORT DDLResult processPackage(ddlpackage::DropPartitionStatement& dropPartitionStmt); - - protected: - private: + DDLResult processPackageInternal(ddlpackage::SqlStatement* dropPartitionStmt); }; } // namespace ddlpackageprocessor diff --git a/dbcon/ddlpackageproc/droptableprocessor.cpp b/dbcon/ddlpackageproc/droptableprocessor.cpp index d40aac3f2..79d3d35f8 100644 --- a/dbcon/ddlpackageproc/droptableprocessor.cpp +++ b/dbcon/ddlpackageproc/droptableprocessor.cpp @@ -50,14 +50,24 @@ using namespace oam; namespace ddlpackageprocessor { -DropTableProcessor::DDLResult DropTableProcessor::processPackage( - ddlpackage::DropTableStatement& dropTableStmt) +DropTableProcessor::DDLResult DropTableProcessor::processPackageInternal(ddlpackage::SqlStatement* sqlStmt) { SUMMARY_INFO("DropTableProcessor::processPackage"); - DDLResult result; result.result = NO_ERROR; std::string err; + + auto* dropTableStmt = dynamic_cast(sqlStmt); + if (!dropTableStmt) + { + Message::Args args; + Message message(9); + args.add("DropTableStatement wrong cast"); + message.format(args); + result.result = DROP_ERROR; + result.message = message; + return result; + } VERBOSE_INFO(dropTableStmt); // Commit current transaction. @@ -82,8 +92,8 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( return result; } - string stmt = dropTableStmt.fSql + "|" + dropTableStmt.fTableName->fSchema + "|"; - SQLLogger logger(stmt, fDDLLoggingId, dropTableStmt.fSessionID, txnID.id); + string stmt = dropTableStmt->fSql + "|" + dropTableStmt->fTableName->fSchema + "|"; + SQLLogger logger(stmt, fDDLLoggingId, dropTableStmt->fSessionID, txnID.id); std::vector oidList; CalpontSystemCatalog::RIDList tableColRidList; @@ -136,12 +146,12 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( { // check table lock boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(dropTableStmt.fSessionID); + CalpontSystemCatalog::makeCalpontSystemCatalog(dropTableStmt->fSessionID); systemCatalogPtr->identity(CalpontSystemCatalog::EC); - systemCatalogPtr->sessionID(dropTableStmt.fSessionID); + systemCatalogPtr->sessionID(dropTableStmt->fSessionID); CalpontSystemCatalog::TableName tableName; - tableName.schema = dropTableStmt.fTableName->fSchema; - tableName.table = dropTableStmt.fTableName->fName; + tableName.schema = dropTableStmt->fTableName->fSchema; + tableName.table = dropTableStmt->fTableName->fName; try { @@ -158,34 +168,42 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( } catch (IDBExcept& ie) { - if (ie.errorCode() == ERR_TABLE_NOT_IN_CATALOG) + if (checkPPLostConnection(ie.what())) { - Message::Args args; - Message message(1); - args.add("Table does not exist in ColumnStore."); - message.format(args); - result.result = DROP_TABLE_NOT_IN_CATALOG_ERROR; - result.message = message; - fSessionManager.rolledback(txnID); + result.result = PP_LOST_CONNECTION; return result; } else { - result.result = DROP_ERROR; - Message::Args args; - Message message(9); - args.add("Drop table failed due to "); - args.add(ie.what()); - message.format(args); - result.message = message; - fSessionManager.rolledback(txnID); - return result; + if (ie.errorCode() == ERR_TABLE_NOT_IN_CATALOG) + { + Message::Args args; + Message message(1); + args.add("Table does not exist in ColumnStore."); + message.format(args); + result.result = DROP_TABLE_NOT_IN_CATALOG_ERROR; + result.message = message; + fSessionManager.rolledback(txnID); + return result; + } + else + { + result.result = DROP_ERROR; + Message::Args args; + Message message(9); + args.add("Drop table failed due to "); + args.add(ie.what()); + message.format(args); + result.message = message; + fSessionManager.rolledback(txnID); + return result; + } } } uint32_t processID = ::getpid(); int32_t txnid = txnID.id; - int32_t sessionId = dropTableStmt.fSessionID; + int32_t sessionId = dropTableStmt->fSessionID; std::string processName("DDLProc"); int i = 0; @@ -228,12 +246,11 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( abs_ts.tv_nsec = rm_ts.tv_nsec; } while (nanosleep(&abs_ts, &rm_ts) < 0); - try { processID = ::getpid(); txnid = txnID.id; - sessionId = dropTableStmt.fSessionID; + sessionId = dropTableStmt->fSessionID; ; processName = "DDLProc"; tableLockId = fDbrm->getTableLock(pms, roPair.objnum, &processName, &processID, &sessionId, &txnid, @@ -272,8 +289,8 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( // 10.Return the OIDs CalpontSystemCatalog::TableName userTableName; - userTableName.schema = dropTableStmt.fTableName->fSchema; - userTableName.table = dropTableStmt.fTableName->fName; + userTableName.schema = dropTableStmt->fTableName->fSchema; + userTableName.table = dropTableStmt->fTableName->fName; tableColRidList = systemCatalogPtr->columnRIDs(userTableName); @@ -300,10 +317,10 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( #endif bytestream << (ByteStream::byte)WE_SVR_DELETE_SYSTABLE; bytestream << uniqueId; - bytestream << (uint32_t)dropTableStmt.fSessionID; + bytestream << (uint32_t)dropTableStmt->fSessionID; bytestream << (uint32_t)txnID.id; - bytestream << dropTableStmt.fTableName->fSchema; - bytestream << dropTableStmt.fTableName->fName; + bytestream << dropTableStmt->fTableName->fSchema; + bytestream << dropTableStmt->fTableName->fName; // Find out where systable is BRM::OID_t sysOid = 1001; @@ -378,30 +395,39 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( if (rc != 0) { - cout << fTxnid.id << " Error in dropping table from systables(" << (int)rc << ") " << errorMsg.c_str() - << endl; - Message::Args args; - Message message(9); - args.add("Error in dropping table from systables."); - args.add(errorMsg); - message.format(args); - result.result = (ResultCode)rc; - result.message = message; - // release table lock and session - fSessionManager.rolledback(txnID); - (void)fDbrm->releaseTableLock(tableLockId); - fWEClient->removeQueue(uniqueId); - return result; + if (checkPPLostConnection(errorMsg)) + { + result.result = PP_LOST_CONNECTION; + (void)fDbrm->releaseTableLock(tableLockId); + fWEClient->removeQueue(uniqueId); + return result; + } + else + { + cout << fTxnid.id << " Error in dropping table from systables(" << (int)rc << ") " << errorMsg.c_str() + << endl; + Message::Args args; + Message message(9); + args.add("Error in dropping table from systables."); + args.add(errorMsg); + message.format(args); + result.result = (ResultCode)rc; + result.message = message; + fSessionManager.rolledback(txnID); + (void)fDbrm->releaseTableLock(tableLockId); + fWEClient->removeQueue(uniqueId); + return result; + } } // remove from syscolumn bytestream.restart(); bytestream << (ByteStream::byte)WE_SVR_DELETE_SYSCOLUMN; bytestream << uniqueId; - bytestream << (uint32_t)dropTableStmt.fSessionID; + bytestream << (uint32_t)dropTableStmt->fSessionID; bytestream << (uint32_t)txnID.id; - bytestream << dropTableStmt.fTableName->fSchema; - bytestream << dropTableStmt.fTableName->fName; + bytestream << dropTableStmt->fTableName->fSchema; + bytestream << dropTableStmt->fTableName->fName; // Find out where syscolumn is sysOid = 1021; @@ -518,7 +544,7 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( } // Log the DDL statement - logDDL(dropTableStmt.fSessionID, txnID.id, dropTableStmt.fSql, dropTableStmt.fOwner); + logDDL(dropTableStmt->fSessionID, txnID.id, dropTableStmt->fSql, dropTableStmt->fOwner); } catch (std::exception& ex) { @@ -738,8 +764,7 @@ DropTableProcessor::DDLResult DropTableProcessor::processPackage( return result; } -TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( - ddlpackage::TruncTableStatement& truncTableStmt) +TruncTableProcessor::DDLResult TruncTableProcessor::processPackageInternal(ddlpackage::SqlStatement* sqlStmt) { SUMMARY_INFO("TruncTableProcessor::processPackage"); // 1. lock the table @@ -756,6 +781,8 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( DDLResult result; result.result = NO_ERROR; std::string err; + + auto* truncTableStmt = dynamic_cast(sqlStmt); VERBOSE_INFO(truncTableStmt); // @Bug 4150. Check dbrm status before doing anything to the table. @@ -778,8 +805,8 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( } //@Bug 5765 log the schema. - string stmt = truncTableStmt.fSql + "|" + truncTableStmt.fTableName->fSchema + "|"; - SQLLogger logger(stmt, fDDLLoggingId, truncTableStmt.fSessionID, txnID.id); + string stmt = truncTableStmt->fSql + "|" + truncTableStmt->fTableName->fSchema + "|"; + SQLLogger logger(stmt, fDDLLoggingId, truncTableStmt->fSessionID, txnID.id); std::vector columnOidList; std::vector allOidList; @@ -789,12 +816,11 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( execplan::CalpontSystemCatalog::ROPair roPair; std::string processName("DDLProc"); uint32_t processID = ::getpid(); - ; int32_t txnid = txnID.id; boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(truncTableStmt.fSessionID); + CalpontSystemCatalog::makeCalpontSystemCatalog(truncTableStmt->fSessionID); systemCatalogPtr->identity(CalpontSystemCatalog::EC); - systemCatalogPtr->sessionID(truncTableStmt.fSessionID); + systemCatalogPtr->sessionID(truncTableStmt->fSessionID); CalpontSystemCatalog::TableInfo tableInfo; uint64_t uniqueId = 0; @@ -840,10 +866,10 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( // check table lock CalpontSystemCatalog::TableName tableName; - tableName.schema = truncTableStmt.fTableName->fSchema; - tableName.table = truncTableStmt.fTableName->fName; + tableName.schema = truncTableStmt->fTableName->fSchema; + tableName.table = truncTableStmt->fTableName->fName; roPair = systemCatalogPtr->tableRID(tableName); - int32_t sessionId = truncTableStmt.fSessionID; + int32_t sessionId = truncTableStmt->fSessionID; std::string processName("DDLProc"); int i = 0; @@ -886,12 +912,11 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( abs_ts.tv_nsec = rm_ts.tv_nsec; } while (nanosleep(&abs_ts, &rm_ts) < 0); - try { processID = ::getpid(); txnid = txnID.id; - sessionId = truncTableStmt.fSessionID; + sessionId = truncTableStmt->fSessionID; processName = "DDLProc"; tableLockId = fDbrm->getTableLock(pms, roPair.objnum, &processName, &processID, &sessionId, &txnid, BRM::LOADING); @@ -916,8 +941,8 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( } CalpontSystemCatalog::TableName userTableName; - userTableName.schema = truncTableStmt.fTableName->fSchema; - userTableName.table = truncTableStmt.fTableName->fName; + userTableName.schema = truncTableStmt->fTableName->fSchema; + userTableName.table = truncTableStmt->fTableName->fName; tableColRidList = systemCatalogPtr->columnRIDs(userTableName); tableAuxColOid = systemCatalogPtr->tableAUXColumnOID(userTableName); @@ -950,30 +975,61 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( } catch (std::exception& ex) { - cerr << "TruncateTableProcessor::processPackage: " << ex.what() << endl; - - Message::Args args; - Message message(9); - args.add("Truncate table failed: "); - args.add(ex.what()); - args.add(""); - fSessionManager.rolledback(txnID); - - try + if (checkPPLostConnection(ex.what())) { - (void)fDbrm->releaseTableLock(tableLockId); + if (tableLockId != 0) + { + Message::Args args; + Message message(9); + args.add("Truncate table failed: "); + args.add(ex.what()); + args.add(""); + try + { + (void)fDbrm->releaseTableLock(tableLockId); + } + catch (std::exception&) + { + args.add(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); + fSessionManager.rolledback(txnID); + message.format(args); + fWEClient->removeQueue(uniqueId); + result.result = TRUNC_ERROR; + return result; + } + } + + fWEClient->removeQueue(uniqueId); + result.result = PP_LOST_CONNECTION; + return result; } - catch (std::exception&) + else { - args.add(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); + cerr << "TruncateTableProcessor::processPackage: " << ex.what() << endl; + + Message::Args args; + Message message(9); + args.add("Truncate table failed: "); + args.add(ex.what()); + args.add(""); + fSessionManager.rolledback(txnID); + + try + { + (void)fDbrm->releaseTableLock(tableLockId); + } + catch (std::exception&) + { + args.add(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); + } + + fWEClient->removeQueue(uniqueId); + message.format(args); + + result.result = TRUNC_ERROR; + result.message = message; + return result; } - - fWEClient->removeQueue(uniqueId); - message.format(args); - - result.result = TRUNC_ERROR; - result.message = message; - return result; } catch (...) { @@ -1328,7 +1384,7 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( if (rc != 0) { - rollBackTransaction(uniqueId, txnID, truncTableStmt.fSessionID); // What to do with the error code + rollBackTransaction(uniqueId, txnID, truncTableStmt->fSessionID); // What to do with the error code fSessionManager.rolledback(txnID); } @@ -1341,7 +1397,7 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( } // Log the DDL statement - logDDL(truncTableStmt.fSessionID, txnID.id, truncTableStmt.fSql, truncTableStmt.fOwner); + logDDL(truncTableStmt->fSessionID, txnID.id, truncTableStmt->fSql, truncTableStmt->fOwner); try { @@ -1380,4 +1436,3 @@ TruncTableProcessor::DDLResult TruncTableProcessor::processPackage( } } // namespace ddlpackageprocessor - diff --git a/dbcon/ddlpackageproc/droptableprocessor.h b/dbcon/ddlpackageproc/droptableprocessor.h index 90ab26173..1a86706e3 100644 --- a/dbcon/ddlpackageproc/droptableprocessor.h +++ b/dbcon/ddlpackageproc/droptableprocessor.h @@ -39,14 +39,14 @@ class DropTableProcessor : public DDLPackageProcessor DropTableProcessor(BRM::DBRM* aDbrm) : DDLPackageProcessor(aDbrm) { } + + protected: + private: /** @brief process a drop table statement * * @param dropTableStmt the drop table statement */ - EXPORT DDLResult processPackage(ddlpackage::DropTableStatement& dropTableStmt); - - protected: - private: + DDLResult processPackageInternal(ddlpackage::SqlStatement* dropTableStmt); }; /** @brief specialization of a DDLPacakageProcessor @@ -59,14 +59,14 @@ class TruncTableProcessor : public DDLPackageProcessor TruncTableProcessor(BRM::DBRM* aDbrm) : DDLPackageProcessor(aDbrm) { } + + protected: + private: /** @brief process a truncate table statement * * @param truncTableStmt the truncate table statement */ - EXPORT DDLResult processPackage(ddlpackage::TruncTableStatement& truncTableStmt); - - protected: - private: + DDLResult processPackageInternal(ddlpackage::SqlStatement* truncTableStmt); }; } // namespace ddlpackageprocessor diff --git a/dbcon/ddlpackageproc/markpartitionprocessor.cpp b/dbcon/ddlpackageproc/markpartitionprocessor.cpp index 6e5f5eb11..6c87d5e0e 100644 --- a/dbcon/ddlpackageproc/markpartitionprocessor.cpp +++ b/dbcon/ddlpackageproc/markpartitionprocessor.cpp @@ -34,16 +34,28 @@ using namespace oam; namespace ddlpackageprocessor { -MarkPartitionProcessor::DDLResult MarkPartitionProcessor::processPackage( - ddlpackage::MarkPartitionStatement& markPartitionStmt) +MarkPartitionProcessor::DDLResult MarkPartitionProcessor::processPackageInternal( + ddlpackage::SqlStatement* sqlStmt) { - SUMMARY_INFO("RestorePartitionProcessor::processPackage"); + SUMMARY_INFO("MarkPartitionProcessor::processPackage"); DDLResult result; result.result = NO_ERROR; std::string err; - VERBOSE_INFO(markPartitionStmt); + auto* markPartitionStmt = dynamic_cast(sqlStmt); + if (!markPartitionStmt) + { + logging::Message::Args args; + logging::Message message(9); + args.add("MarkPartitionStatement wrong cast"); + message.format(args); + result.result = DROP_ERROR; + result.message = message; + return result; + } + + VERBOSE_INFO(markPartitionStmt); BRM::TxnID txnID; txnID.id = fTxnid.id; txnID.valid = fTxnid.valid; @@ -69,30 +81,30 @@ MarkPartitionProcessor::DDLResult MarkPartitionProcessor::processPackage( CalpontSystemCatalog::DictOIDList dictOIDList; std::string processName("DDLProc"); - string stmt = markPartitionStmt.fSql + "|" + markPartitionStmt.fTableName->fSchema + "|"; - SQLLogger logger(stmt, fDDLLoggingId, markPartitionStmt.fSessionID, txnID.id); + string stmt = markPartitionStmt->fSql + "|" + markPartitionStmt->fTableName->fSchema + "|"; + SQLLogger logger(stmt, fDDLLoggingId, markPartitionStmt->fSessionID, txnID.id); uint32_t processID = 0; uint64_t uniqueID = 0; - uint32_t sessionID = markPartitionStmt.fSessionID; + uint32_t sessionID = markPartitionStmt->fSessionID; execplan::CalpontSystemCatalog::ROPair roPair; try { // check table lock boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(markPartitionStmt.fSessionID); + CalpontSystemCatalog::makeCalpontSystemCatalog(markPartitionStmt->fSessionID); systemCatalogPtr->identity(CalpontSystemCatalog::EC); - systemCatalogPtr->sessionID(markPartitionStmt.fSessionID); + systemCatalogPtr->sessionID(markPartitionStmt->fSessionID); CalpontSystemCatalog::TableName tableName; - tableName.schema = markPartitionStmt.fTableName->fSchema; - tableName.table = markPartitionStmt.fTableName->fName; + tableName.schema = markPartitionStmt->fTableName->fSchema; + tableName.table = markPartitionStmt->fTableName->fName; roPair = systemCatalogPtr->tableRID(tableName); //@Bug 3054 check for system catalog if (roPair.objnum < 3000) { - throw std::runtime_error("Drop partition cannot be operated on Calpont system catalog."); + throw std::runtime_error("Mark partition cannot be operated on Calpont system catalog."); } int i = 0; @@ -142,7 +154,7 @@ MarkPartitionProcessor::DDLResult MarkPartitionProcessor::processPackage( } while (nanosleep(&abs_ts, &rm_ts) < 0); // reset - sessionID = markPartitionStmt.fSessionID; + sessionID = markPartitionStmt->fSessionID; txnID.id = fTxnid.id; txnID.valid = fTxnid.valid; processID = ::getpid(); @@ -186,8 +198,8 @@ MarkPartitionProcessor::DDLResult MarkPartitionProcessor::processPackage( // 6. Remove the column and dictionary files for the partition CalpontSystemCatalog::TableName userTableName; - userTableName.schema = markPartitionStmt.fTableName->fSchema; - userTableName.table = markPartitionStmt.fTableName->fName; + userTableName.schema = markPartitionStmt->fTableName->fSchema; + userTableName.table = markPartitionStmt->fTableName->fName; tableColRidList = systemCatalogPtr->columnRIDs(userTableName); tableAuxColOid = systemCatalogPtr->tableAUXColumnOID(userTableName); @@ -214,7 +226,7 @@ MarkPartitionProcessor::DDLResult MarkPartitionProcessor::processPackage( // Remove the partition from extent map string emsg; - rc = fDbrm->markPartitionForDeletion(oidList, markPartitionStmt.fPartitions, emsg); + rc = fDbrm->markPartitionForDeletion(oidList, markPartitionStmt->fPartitions, emsg); if (rc != 0) { @@ -281,7 +293,7 @@ MarkPartitionProcessor::DDLResult MarkPartitionProcessor::processPackage( } // Log the DDL statement - logging::logDDL(markPartitionStmt.fSessionID, 0, markPartitionStmt.fSql, markPartitionStmt.fOwner); + logging::logDDL(markPartitionStmt->fSessionID, 0, markPartitionStmt->fSql, markPartitionStmt->fOwner); try { diff --git a/dbcon/ddlpackageproc/markpartitionprocessor.h b/dbcon/ddlpackageproc/markpartitionprocessor.h index 4f706bd0a..c3661a81d 100644 --- a/dbcon/ddlpackageproc/markpartitionprocessor.h +++ b/dbcon/ddlpackageproc/markpartitionprocessor.h @@ -39,14 +39,14 @@ class MarkPartitionProcessor : public DDLPackageProcessor MarkPartitionProcessor(BRM::DBRM* aDbrm) : DDLPackageProcessor(aDbrm) { } + + protected: + private: /** @brief process a create table statement * * @param createTableStmt the CreateTableStatement */ - EXPORT DDLResult processPackage(ddlpackage::MarkPartitionStatement& MarkPartitionStmt); - - protected: - private: + DDLResult processPackageInternal(ddlpackage::SqlStatement* MarkPartitionStmt); }; } // namespace ddlpackageprocessor diff --git a/dbcon/ddlpackageproc/restorepartitionprocessor.cpp b/dbcon/ddlpackageproc/restorepartitionprocessor.cpp index fea8f7b54..d2883222b 100644 --- a/dbcon/ddlpackageproc/restorepartitionprocessor.cpp +++ b/dbcon/ddlpackageproc/restorepartitionprocessor.cpp @@ -34,14 +34,27 @@ using namespace WriteEngine; namespace ddlpackageprocessor { -RestorePartitionProcessor::DDLResult RestorePartitionProcessor::processPackage( - ddlpackage::RestorePartitionStatement& restorePartitionStmt) +RestorePartitionProcessor::DDLResult RestorePartitionProcessor::processPackageInternal( + ddlpackage::SqlStatement* sqlStmt) { SUMMARY_INFO("RestorePartitionProcessor::processPackage"); DDLResult result; result.result = NO_ERROR; std::string err; + + auto* restorePartitionStmt = dynamic_cast(sqlStmt); + if (!restorePartitionStmt) + { + logging::Message::Args args; + logging::Message message(9); + args.add("RestorePartitionStatement wrong cast"); + message.format(args); + result.result = DROP_ERROR; + result.message = message; + return result; + } + VERBOSE_INFO(restorePartitionStmt); BRM::TxnID txnID; @@ -69,24 +82,24 @@ RestorePartitionProcessor::DDLResult RestorePartitionProcessor::processPackage( CalpontSystemCatalog::DictOIDList dictOIDList; std::string processName("DDLProc"); - string stmt = restorePartitionStmt.fSql + "|" + restorePartitionStmt.fTableName->fSchema + "|"; - SQLLogger logger(stmt, fDDLLoggingId, restorePartitionStmt.fSessionID, txnID.id); + string stmt = restorePartitionStmt->fSql + "|" + restorePartitionStmt->fTableName->fSchema + "|"; + SQLLogger logger(stmt, fDDLLoggingId, restorePartitionStmt->fSessionID, txnID.id); uint32_t processID = 0; uint64_t uniqueID = 0; - uint32_t sessionID = restorePartitionStmt.fSessionID; + uint32_t sessionID = restorePartitionStmt->fSessionID; execplan::CalpontSystemCatalog::ROPair roPair; try { // check table lock boost::shared_ptr systemCatalogPtr = - CalpontSystemCatalog::makeCalpontSystemCatalog(restorePartitionStmt.fSessionID); + CalpontSystemCatalog::makeCalpontSystemCatalog(restorePartitionStmt->fSessionID); systemCatalogPtr->identity(CalpontSystemCatalog::EC); - systemCatalogPtr->sessionID(restorePartitionStmt.fSessionID); + systemCatalogPtr->sessionID(restorePartitionStmt->fSessionID); CalpontSystemCatalog::TableName tableName; - tableName.schema = restorePartitionStmt.fTableName->fSchema; - tableName.table = restorePartitionStmt.fTableName->fName; + tableName.schema = restorePartitionStmt->fTableName->fSchema; + tableName.table = restorePartitionStmt->fTableName->fName; roPair = systemCatalogPtr->tableRID(tableName); //@Bug 3054 check for system catalog @@ -142,7 +155,7 @@ RestorePartitionProcessor::DDLResult RestorePartitionProcessor::processPackage( } while (nanosleep(&abs_ts, &rm_ts) < 0); // reset - sessionID = restorePartitionStmt.fSessionID; + sessionID = restorePartitionStmt->fSessionID; txnID.id = fTxnid.id; txnID.valid = fTxnid.valid; processID = ::getpid(); @@ -188,8 +201,8 @@ RestorePartitionProcessor::DDLResult RestorePartitionProcessor::processPackage( // 6. Remove the column and dictionary files for the partition CalpontSystemCatalog::TableName userTableName; - userTableName.schema = restorePartitionStmt.fTableName->fSchema; - userTableName.table = restorePartitionStmt.fTableName->fName; + userTableName.schema = restorePartitionStmt->fTableName->fSchema; + userTableName.table = restorePartitionStmt->fTableName->fName; tableColRidList = systemCatalogPtr->columnRIDs(userTableName); tableAuxColOid = systemCatalogPtr->tableAUXColumnOID(userTableName); @@ -216,7 +229,7 @@ RestorePartitionProcessor::DDLResult RestorePartitionProcessor::processPackage( // Remove the partition from extent map string emsg; - rc = fDbrm->restorePartition(oidList, restorePartitionStmt.fPartitions, emsg); + rc = fDbrm->restorePartition(oidList, restorePartitionStmt->fPartitions, emsg); if (rc != 0) { @@ -281,8 +294,8 @@ RestorePartitionProcessor::DDLResult RestorePartitionProcessor::processPackage( } // Log the DDL statement - logging::logDDL(restorePartitionStmt.fSessionID, txnID.id, restorePartitionStmt.fSql, - restorePartitionStmt.fOwner); + logging::logDDL(restorePartitionStmt->fSessionID, txnID.id, restorePartitionStmt->fSql, + restorePartitionStmt->fOwner); try { diff --git a/dbcon/ddlpackageproc/restorepartitionprocessor.h b/dbcon/ddlpackageproc/restorepartitionprocessor.h index 760d6433c..8e77c7eb9 100644 --- a/dbcon/ddlpackageproc/restorepartitionprocessor.h +++ b/dbcon/ddlpackageproc/restorepartitionprocessor.h @@ -39,14 +39,14 @@ class RestorePartitionProcessor : public DDLPackageProcessor RestorePartitionProcessor(BRM::DBRM* aDbrm) : DDLPackageProcessor(aDbrm) { } + + protected: + private: /** @brief process a drop table statement * * @param dropTableStmt the drop table statement */ - EXPORT DDLResult processPackage(ddlpackage::RestorePartitionStatement& RestorePartitionStmt); - - protected: - private: + DDLResult processPackageInternal(ddlpackage::SqlStatement* RestorePartitionStmt); }; } // namespace ddlpackageprocessor diff --git a/dbcon/dmlpackageproc/commandpackageprocessor.cpp b/dbcon/dmlpackageproc/commandpackageprocessor.cpp index 764ecc609..7c3c4794d 100644 --- a/dbcon/dmlpackageproc/commandpackageprocessor.cpp +++ b/dbcon/dmlpackageproc/commandpackageprocessor.cpp @@ -53,7 +53,7 @@ namespace dmlpackageprocessor /*static*/ std::set CommandPackageProcessor::fActiveClearTableLockCmds; /*static*/ boost::mutex CommandPackageProcessor::fActiveClearTableLockCmdMutex; -DMLPackageProcessor::DMLResult CommandPackageProcessor::processPackage( +DMLPackageProcessor::DMLResult CommandPackageProcessor::processPackageInternal( dmlpackage::CalpontDMLPackage& cpackage) { SUMMARY_INFO("CommandPackageProcessor::processPackage"); @@ -485,17 +485,24 @@ DMLPackageProcessor::DMLResult CommandPackageProcessor::processPackage( } catch (std::exception& ex) { - cerr << "CommandPackageProcessor::processPackage: " << ex.what() << endl; + if (checkPPLostConnection(ex)) + { + result.result = PP_LOST_CONNECTION; + } + else + { + cerr << "CommandPackageProcessor::processPackage: " << ex.what() << endl; - logging::Message::Args args; - logging::Message message(1); - args.add(ex.what()); - args.add(""); - args.add(""); - message.format(args); + logging::Message::Args args; + logging::Message message(1); + args.add(ex.what()); + args.add(""); + args.add(""); + message.format(args); - result.result = COMMAND_ERROR; - result.message = message; + result.result = COMMAND_ERROR; + result.message = message; + } } catch (...) { diff --git a/dbcon/dmlpackageproc/commandpackageprocessor.h b/dbcon/dmlpackageproc/commandpackageprocessor.h index b0d9a67a6..ec0218e59 100644 --- a/dbcon/dmlpackageproc/commandpackageprocessor.h +++ b/dbcon/dmlpackageproc/commandpackageprocessor.h @@ -47,17 +47,13 @@ class CommandPackageProcessor : public DMLPackageProcessor CommandPackageProcessor(BRM::DBRM* aDbrm, uint32_t sid) : DMLPackageProcessor(aDbrm, sid) { } - /** @brief process an CommandDMLPackage - * - * @param cpackage the CommandDMLPackage to process - */ - EXPORT DMLResult processPackage(dmlpackage::CalpontDMLPackage& cpackage); protected: private: void viewTableLock(const dmlpackage::CalpontDMLPackage& cpackage, DMLResult& result); void clearTableLock(uint64_t uniqueId, const dmlpackage::CalpontDMLPackage& cpackage, DMLResult& result); void establishTableLockToClear(uint64_t tableLockID, BRM::TableLockInfo& lockInfo); + DMLResult processPackageInternal(dmlpackage::CalpontDMLPackage& cpackage) override; // Tracks active cleartablelock commands by storing set of table lock IDs static std::set fActiveClearTableLockCmds; diff --git a/dbcon/dmlpackageproc/deletepackageprocessor.cpp b/dbcon/dmlpackageproc/deletepackageprocessor.cpp index abe88fbac..42e3b4ee4 100644 --- a/dbcon/dmlpackageproc/deletepackageprocessor.cpp +++ b/dbcon/dmlpackageproc/deletepackageprocessor.cpp @@ -57,7 +57,8 @@ using namespace messageqcpp; using namespace oam; namespace dmlpackageprocessor { -DMLPackageProcessor::DMLResult DeletePackageProcessor::processPackage(dmlpackage::CalpontDMLPackage& cpackage) +DMLPackageProcessor::DMLResult DeletePackageProcessor::processPackageInternal( + dmlpackage::CalpontDMLPackage& cpackage) { SUMMARY_INFO("DeletePackageProcessor::processPackage"); @@ -174,7 +175,6 @@ DMLPackageProcessor::DMLResult DeletePackageProcessor::processPackage(dmlpackage abs_ts.tv_nsec = rm_ts.tv_nsec; } while (nanosleep(&abs_ts, &rm_ts) < 0); - try { processID = ::getpid(); @@ -264,15 +264,22 @@ DMLPackageProcessor::DMLResult DeletePackageProcessor::processPackage(dmlpackage } catch (exception& ex) { - cerr << "DeletePackageProcessor::processPackage: " << ex.what() << endl; - - //@Bug 4994 Cancelled job is not error - if (result.result == 0) + if (checkPPLostConnection(ex)) { - result.result = DELETE_ERROR; + result.result = PP_LOST_CONNECTION; } + else + { + cerr << "DeletePackageProcessor::processPackage: " << ex.what() << endl; - result.message = Message(ex.what()); + //@Bug 4994 Cancelled job is not error + if (result.result == 0) + { + result.result = DELETE_ERROR; + } + + result.message = Message(ex.what()); + } } catch (...) { diff --git a/dbcon/dmlpackageproc/deletepackageprocessor.h b/dbcon/dmlpackageproc/deletepackageprocessor.h index bf487b41c..084e004db 100644 --- a/dbcon/dmlpackageproc/deletepackageprocessor.h +++ b/dbcon/dmlpackageproc/deletepackageprocessor.h @@ -45,14 +45,11 @@ class DeletePackageProcessor : public DMLPackageProcessor DeletePackageProcessor(BRM::DBRM* aDbrm, uint32_t sid) : DMLPackageProcessor(aDbrm, sid) { } - /** @brief process a DeleteDMLPackage - * - * @param cpackage the delete dml package to process - */ - EXPORT DMLResult processPackage(dmlpackage::CalpontDMLPackage& cpackage); protected: private: + DMLResult processPackageInternal(dmlpackage::CalpontDMLPackage& cpackage) override; + /** @brief delete a row * * @param txnID the transaction id diff --git a/dbcon/dmlpackageproc/dmlpackageprocessor.cpp b/dbcon/dmlpackageproc/dmlpackageprocessor.cpp index 0f49bc215..e5d288d0d 100644 --- a/dbcon/dmlpackageproc/dmlpackageprocessor.cpp +++ b/dbcon/dmlpackageproc/dmlpackageprocessor.cpp @@ -287,6 +287,31 @@ int32_t DMLPackageProcessor::tryToRollBackTransaction(uint64_t uniqueId, BRM::Tx return weRc; } +DMLPackageProcessor::DMLResult DMLPackageProcessor::processPackage(dmlpackage::CalpontDMLPackage& cpackage) +{ + auto result = processPackageInternal(cpackage); + uint32_t tries = 0; + // Try to setup connection and process package one more time. + while ((result.result == PP_LOST_CONNECTION) && (tries < 5)) + { + std::cerr << "DMLPackageProcessor: NETWORK ERROR; attempt # " << tries << std::endl; + joblist::ResourceManager* rm = joblist::ResourceManager::instance(true); + joblist::DistributedEngineComm* fEc = joblist::DistributedEngineComm::instance(rm); + if (fEc->Setup()) + return result; + + result = processPackageInternal(cpackage); + ++tries; + } + return result; +} + +bool DMLPackageProcessor::checkPPLostConnection(std::exception& ex) +{ + std::string error = ex.what(); + return error.find(PPLostConnectionErrorCode) != std::string::npos; +} + int DMLPackageProcessor::rollBackTransaction(uint64_t uniqueId, BRM::TxnID txnID, uint32_t sessionID, std::string& errorMsg) { diff --git a/dbcon/dmlpackageproc/dmlpackageprocessor.h b/dbcon/dmlpackageproc/dmlpackageprocessor.h index ba3d4aaf5..ec7956c81 100644 --- a/dbcon/dmlpackageproc/dmlpackageprocessor.h +++ b/dbcon/dmlpackageproc/dmlpackageprocessor.h @@ -98,7 +98,8 @@ class DMLPackageProcessor TABLE_LOCK_ERROR, JOB_ERROR, JOB_CANCELED, - DBRM_READ_ONLY + DBRM_READ_ONLY, + PP_LOST_CONNECTION }; enum DebugLevel /** @brief Debug level type enumeration */ @@ -148,30 +149,6 @@ class DMLPackageProcessor spare = 0x3E; } }; - /** @brief a structure to hold a datetime - */ - struct dateTime - { - unsigned msecond : 20; - unsigned second : 6; - unsigned minute : 6; - unsigned hour : 6; - unsigned day : 6; - unsigned month : 4; - unsigned year : 16; - // NULL column value = 0xFFFFFFFFFFFFFFFE - dateTime() - { - year = 0xFFFF; - month = 0xF; - day = 0x3F; - hour = 0x3F; - minute = 0x3F; - second = 0x3F; - msecond = 0xFFFFE; - } - }; - /** @brief ctor */ DMLPackageProcessor(BRM::DBRM* aDbrm, uint32_t sid) @@ -236,7 +213,11 @@ class DMLPackageProcessor * * @param cpackage the CalpontDMLPackage to process */ - virtual DMLResult processPackage(dmlpackage::CalpontDMLPackage& cpackage) = 0; + DMLResult processPackage(dmlpackage::CalpontDMLPackage& cpackage); + + /** @brief Check that give exception is related to PP lost connection. + */ + bool checkPPLostConnection(std::exception& ex); inline void setRM(joblist::ResourceManager* frm) { @@ -526,6 +507,8 @@ class DMLPackageProcessor execplan::ClientRotator* fExeMgr; private: + virtual DMLResult processPackageInternal(dmlpackage::CalpontDMLPackage& cpackage) = 0; + /** @brief clean beginning and ending glitches and spaces from string * * @param s string to be cleaned @@ -533,6 +516,8 @@ class DMLPackageProcessor void cleanString(std::string& s); DebugLevel fDebugLevel; // internal use debug level + + const std::string PPLostConnectionErrorCode = "MCS-2045"; }; /** @brief helper template function to do safe from string to type conversions diff --git a/dbcon/dmlpackageproc/insertpackageprocessor.cpp b/dbcon/dmlpackageproc/insertpackageprocessor.cpp index 6257a3aed..524acb677 100644 --- a/dbcon/dmlpackageproc/insertpackageprocessor.cpp +++ b/dbcon/dmlpackageproc/insertpackageprocessor.cpp @@ -50,7 +50,8 @@ using namespace messageqcpp; namespace dmlpackageprocessor { -DMLPackageProcessor::DMLResult InsertPackageProcessor::processPackage(dmlpackage::CalpontDMLPackage& cpackage) +DMLPackageProcessor::DMLResult InsertPackageProcessor::processPackageInternal( + dmlpackage::CalpontDMLPackage& cpackage) { SUMMARY_INFO("InsertPackageProcessor::processPackage"); @@ -184,7 +185,6 @@ DMLPackageProcessor::DMLResult InsertPackageProcessor::processPackage(dmlpackage abs_ts.tv_nsec = rm_ts.tv_nsec; } while (nanosleep(&abs_ts, &rm_ts) < 0); - try { processID = ::getpid(); @@ -365,21 +365,28 @@ DMLPackageProcessor::DMLResult InsertPackageProcessor::processPackage(dmlpackage } catch (exception& ex) { - cerr << "InsertPackageProcessor::processPackage: " << ex.what() << endl; - - logging::Message::Args args; - logging::Message message(1); - args.add("Insert Failed: "); - args.add(ex.what()); - args.add(""); - args.add(""); - message.format(args); - - if (result.result != VB_OVERFLOW_ERROR) + if (checkPPLostConnection(ex)) { - result.result = INSERT_ERROR; - result.message = message; - errorMsg = ex.what(); + result.result = PP_LOST_CONNECTION; + } + else + { + cerr << "InsertPackageProcessor::processPackage: " << ex.what() << endl; + + logging::Message::Args args; + logging::Message message(1); + args.add("Insert Failed: "); + args.add(ex.what()); + args.add(""); + args.add(""); + message.format(args); + + if (result.result != VB_OVERFLOW_ERROR) + { + result.result = INSERT_ERROR; + result.message = message; + errorMsg = ex.what(); + } } } catch (...) @@ -397,7 +404,19 @@ DMLPackageProcessor::DMLResult InsertPackageProcessor::processPackage(dmlpackage result.message = message; } - if ((rc != 0) && (rc != IDBRANGE_WARNING)) + if (rc == 1) + { + logging::Message::Args args; + logging::Message message(1); + args.add("Insert Failed: "); + args.add(errorMsg); + args.add(""); + args.add(""); + message.format(args); + result.result = PP_LOST_CONNECTION; + result.message = message; + } + else if ((rc != 0) && (rc != IDBRANGE_WARNING)) { logging::Message::Args args; logging::Message message(1); @@ -427,4 +446,3 @@ DMLPackageProcessor::DMLResult InsertPackageProcessor::processPackage(dmlpackage } } // namespace dmlpackageprocessor - diff --git a/dbcon/dmlpackageproc/insertpackageprocessor.h b/dbcon/dmlpackageproc/insertpackageprocessor.h index a868bccfd..7be135207 100644 --- a/dbcon/dmlpackageproc/insertpackageprocessor.h +++ b/dbcon/dmlpackageproc/insertpackageprocessor.h @@ -45,14 +45,10 @@ class InsertPackageProcessor : public DMLPackageProcessor InsertPackageProcessor(BRM::DBRM* aDbrm, uint32_t sid) : DMLPackageProcessor(aDbrm, sid) { } - /** @brief process an InsertDMLPackage - * - * @param cpackage the InsertDMLPackage to process - */ - EXPORT DMLResult processPackage(dmlpackage::CalpontDMLPackage& cpackage); protected: private: + DMLResult processPackageInternal(dmlpackage::CalpontDMLPackage& cpackage) override; }; } // namespace dmlpackageprocessor diff --git a/dbcon/dmlpackageproc/updatepackageprocessor.cpp b/dbcon/dmlpackageproc/updatepackageprocessor.cpp index 35b4cb166..ab2127721 100644 --- a/dbcon/dmlpackageproc/updatepackageprocessor.cpp +++ b/dbcon/dmlpackageproc/updatepackageprocessor.cpp @@ -61,7 +61,8 @@ using namespace oam; namespace dmlpackageprocessor { // StopWatch timer; -DMLPackageProcessor::DMLResult UpdatePackageProcessor::processPackage(dmlpackage::CalpontDMLPackage& cpackage) +DMLPackageProcessor::DMLResult UpdatePackageProcessor::processPackageInternal( + dmlpackage::CalpontDMLPackage& cpackage) { SUMMARY_INFO("UpdatePackageProcessor::processPackage"); @@ -201,7 +202,6 @@ DMLPackageProcessor::DMLResult UpdatePackageProcessor::processPackage(dmlpackage abs_ts.tv_nsec = rm_ts.tv_nsec; } while (nanosleep(&abs_ts, &rm_ts) < 0); - try { processID = ::getpid(); @@ -301,22 +301,28 @@ DMLPackageProcessor::DMLResult UpdatePackageProcessor::processPackage(dmlpackage } catch (std::exception& ex) { - cerr << "UpdatePackageProcessor::processPackage:" << ex.what() << endl; - - if (result.result == 0) + if (checkPPLostConnection(ex)) { - result.result = UPDATE_ERROR; + result.result = PP_LOST_CONNECTION; } + else + { + cerr << "UpdatePackageProcessor::processPackage:" << ex.what() << endl; + if (result.result == 0) + { + result.result = UPDATE_ERROR; + } - result.message = Message(ex.what()); - result.rowCount = 0; - LoggingID logid(DMLLoggingId, fSessionID, txnid.id); - logging::Message::Args args1; - logging::Message msg(1); - args1.add("End SQL statement with error"); - msg.format(args1); - logging::Logger logger(logid.fSubsysID); - logger.logMessage(LOG_TYPE_DEBUG, msg, logid); + result.message = Message(ex.what()); + result.rowCount = 0; + LoggingID logid(DMLLoggingId, fSessionID, txnid.id); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("End SQL statement with error"); + msg.format(args1); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_DEBUG, msg, logid); + } } catch (...) { diff --git a/dbcon/dmlpackageproc/updatepackageprocessor.h b/dbcon/dmlpackageproc/updatepackageprocessor.h index 42ad8abcb..72b2e0b40 100644 --- a/dbcon/dmlpackageproc/updatepackageprocessor.h +++ b/dbcon/dmlpackageproc/updatepackageprocessor.h @@ -42,14 +42,11 @@ class UpdatePackageProcessor : public DMLPackageProcessor UpdatePackageProcessor(BRM::DBRM* aDbrm, uint32_t sid) : DMLPackageProcessor(aDbrm, sid) { } - /** @brief process an UpdateDMLPackage - * - * @param cpackage the UpdateDMLPackage to process - */ - EXPORT DMLResult processPackage(dmlpackage::CalpontDMLPackage& cpackage); protected: private: + DMLResult processPackageInternal(dmlpackage::CalpontDMLPackage& cpackage) override; + /** @brief send execution plan to ExeMgr and fetch rows * * @param cpackage the UpdateDMLPackage to process diff --git a/dbcon/execplan/aggregatecolumn.h b/dbcon/execplan/aggregatecolumn.h index ec73b2256..6ead28c20 100644 --- a/dbcon/execplan/aggregatecolumn.h +++ b/dbcon/execplan/aggregatecolumn.h @@ -75,6 +75,7 @@ class AggregateColumn : public ReturnedColumn BIT_XOR, GROUP_CONCAT, JSON_ARRAYAGG, + SELECT_SOME, UDAF, MULTI_PARM }; diff --git a/dbcon/joblist/joblistfactory.cpp b/dbcon/joblist/joblistfactory.cpp index 7289d0b5f..c6d363f63 100644 --- a/dbcon/joblist/joblistfactory.cpp +++ b/dbcon/joblist/joblistfactory.cpp @@ -541,7 +541,9 @@ void checkGroupByCols(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) // Not an aggregate column and not an expression of aggregation. if (dynamic_cast(orderByCols[i].get()) == NULL && orderByCols[i]->aggColumnList().empty()) + { csep->groupByCols().push_back(orderByCols[i]); + } } } } @@ -604,7 +606,9 @@ void checkGroupByCols(CalpontSelectExecutionPlan* csep, JobInfo& jobInfo) } if (csep->groupByCols().size() != uniqGbCols.size()) + { (csep)->groupByCols(uniqGbCols); + } } } diff --git a/dbcon/joblist/tupleaggregatestep.cpp b/dbcon/joblist/tupleaggregatestep.cpp index 3ea77003b..6978fda77 100644 --- a/dbcon/joblist/tupleaggregatestep.cpp +++ b/dbcon/joblist/tupleaggregatestep.cpp @@ -124,6 +124,8 @@ inline RowAggFunctionType functionIdMap(int planFuncId) case AggregateColumn::MULTI_PARM: return ROWAGG_MULTI_PARM; + case AggregateColumn::SELECT_SOME: return ROWAGG_SELECT_SOME; + default: return ROWAGG_FUNCT_UNDEFINE; } } @@ -289,7 +291,6 @@ TupleAggregateStep::TupleAggregateStep(const SP_ROWAGG_UM_t& agg, const RowGroup fNumOfBuckets = calcNumberOfBuckets(memLimit, fNumOfThreads, fNumOfBuckets, fNumOfRowGroups, fRowGroupIn.getRowSize(), fRowGroupOut.getRowSize(), fRm->getAllowDiskAggregation()); - fNumOfThreads = std::min(fNumOfThreads, fNumOfBuckets); fMemUsage.reset(new uint64_t[fNumOfThreads]); @@ -393,7 +394,7 @@ void TupleAggregateStep::doThreadedSecondPhaseAggregate(uint32_t threadID) rowGroupIn->initRow(&rowIn); auto* subDistAgg = dynamic_cast(multiDist->subAggregators()[j].get()); - while (subDistAgg->nextOutputRowGroup()) + while (subDistAgg->nextRowGroup()) { rowGroupIn = (multiDist->subAggregators()[j]->getOutputRowGroup()); rgDataVec.emplace_back(subDistAgg->moveCurrentRGData()); @@ -417,7 +418,7 @@ void TupleAggregateStep::doThreadedSecondPhaseAggregate(uint32_t threadID) rowGroupIn->initRow(&rowIn); auto* subAgg = dynamic_cast(aggDist->aggregator().get()); - while (subAgg->nextOutputRowGroup()) + while (subAgg->nextRowGroup()) { rowGroupIn->setData(aggDist->aggregator()->getOutputRowGroup()->getRGData()); rgDataVec.emplace_back(subAgg->moveCurrentRGData()); @@ -572,7 +573,7 @@ bool TupleAggregateStep::nextDeliveredRowGroup() { for (; fBucketNum < fNumOfBuckets; fBucketNum++) { - while (fAggregators[fBucketNum]->nextOutputRowGroup()) + while (fAggregators[fBucketNum]->nextRowGroup()) { fAggregators[fBucketNum]->finalize(); fRowGroupDelivered.setData(fAggregators[fBucketNum]->getOutputRowGroup()->getRGData()); @@ -1204,6 +1205,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector& { case ROWAGG_MIN: case ROWAGG_MAX: + case ROWAGG_SELECT_SOME: { oidsAgg.push_back(oidsProj[colProj]); keysAgg.push_back(key); @@ -1766,6 +1768,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vectorfAggFunction == ROWAGG_STATS || f->fAggFunction == ROWAGG_BIT_AND || f->fAggFunction == ROWAGG_BIT_OR || f->fAggFunction == ROWAGG_BIT_XOR || f->fAggFunction == ROWAGG_CONSTANT || f->fAggFunction == ROWAGG_GROUP_CONCAT || - f->fAggFunction == ROWAGG_JSON_ARRAY)) + f->fAggFunction == ROWAGG_JSON_ARRAY || f->fAggFunction == ROWAGG_SELECT_SOME)) { funct.reset(new RowAggFunctionCol(f->fAggFunction, f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, f->fAuxColumnIndex - multiParms)); @@ -3127,6 +3131,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector { case ROWAGG_MIN: case ROWAGG_MAX: + case ROWAGG_SELECT_SOME: { oidsAggPm.push_back(oidsProj[colProj]); keysAggPm.push_back(aggKey); @@ -4044,6 +4049,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vectorfAggFunction == ROWAGG_MIN || f->fAggFunction == ROWAGG_MAX || f->fAggFunction == ROWAGG_STATS || f->fAggFunction == ROWAGG_BIT_AND || f->fAggFunction == ROWAGG_BIT_OR || f->fAggFunction == ROWAGG_BIT_XOR || - f->fAggFunction == ROWAGG_CONSTANT) + f->fAggFunction == ROWAGG_CONSTANT || f->fAggFunction == ROWAGG_SELECT_SOME) { funct.reset(new RowAggFunctionCol(f->fAggFunction, f->fStatsFunction, f->fInputColumnIndex, f->fOutputColumnIndex, f->fAuxColumnIndex - multiParms)); @@ -5697,27 +5703,14 @@ void TupleAggregateStep::doAggregate() return; } -/** @brief Aggregate input row groups in two-phase multi-threaded aggregation. - * In second phase handle three different aggregation cases differently: - * 1. Query contains at least one aggregation on a DISTINCT column, e.g. SUM (DISTINCT col1) AND at least one - * GROUP BY column - * 2. Query contains at least one aggregation on a DISTINCT column but no GROUP BY column - * 3. Query contains no aggregation on a DISTINCT column, but at least one GROUP BY column - * DISTINCT selects (e.g. SELECT DISTINCT col1 FROM ...) are handled in tupleannexstep.cpp. - */ uint64_t TupleAggregateStep::doThreadedAggregate(ByteStream& bs, RowGroupDL* dlp) { - // initialize return value variable + uint32_t i; + RGData rgData; uint64_t rowCount = 0; try { - /* - * Phase 1: Distribute input rows to different buckets depending on the hash value of the group by columns - * per row. Then distribute buckets equally on aggregators in fAggregators. (Number of fAggregators == - * fNumOfBuckets). Each previously created hash bucket is represented as one RowGroup in a fAggregator. - */ - if (!fDoneAggregate) { initializeMultiThread(); @@ -5726,9 +5719,9 @@ uint64_t TupleAggregateStep::doThreadedAggregate(ByteStream& bs, RowGroupDL* dlp runners.reserve(fNumOfThreads); // to prevent a resize during use // Start the aggregator threads - for (uint32_t threadNum = 0; threadNum < fNumOfThreads; threadNum++) + for (i = 0; i < fNumOfThreads; i++) { - runners.push_back(jobstepThreadPool.invoke(ThreadedAggregator(this, threadNum))); + runners.push_back(jobstepThreadPool.invoke(ThreadedAggregator(this, i))); } // Now wait for all those threads @@ -5742,28 +5735,18 @@ uint64_t TupleAggregateStep::doThreadedAggregate(ByteStream& bs, RowGroupDL* dlp // much memory on average uint32_t threads = std::max(1U, fNumOfThreads / 2); runners.reserve(threads); - for (uint32_t threadNum = 0; threadNum < threads; ++threadNum) + for (i = 0; i < threads; ++i) { - runners.push_back(jobstepThreadPool.invoke(ThreadedAggregateFinalizer(this, threadNum))); + runners.push_back(jobstepThreadPool.invoke(ThreadedAggregateFinalizer(this, i))); } jobstepThreadPool.join(runners); } - /* - * Phase 2: Depending on query type (see below) do aggregation per previously created RowGroup of rows - * that need to aggregated and output results. - */ - - auto* distinctAggregator = dynamic_cast(fAggregator.get()); - const bool hasGroupByColumns = fAggregator->aggMapKeyLength() > 0; - - // Case 1: Query contains at least one aggregation on a DISTINCT column AND at least one GROUP BY column - // e.g. SELECT SUM(DISTINCT col1) FROM test GROUP BY col2; - if (distinctAggregator && hasGroupByColumns) + if (dynamic_cast(fAggregator.get()) && fAggregator->aggMapKeyLength() > 0) { + // 2nd phase multi-threaded aggregate if (!fEndOfResult) { - // Do multi-threaded second phase aggregation (per row group created for GROUP BY statement) if (!fDoneAggregate) { vector runners; // thread pool handles @@ -5771,114 +5754,97 @@ uint64_t TupleAggregateStep::doThreadedAggregate(ByteStream& bs, RowGroupDL* dlp uint32_t bucketsPerThread = fNumOfBuckets / fNumOfThreads; uint32_t numThreads = ((fNumOfBuckets % fNumOfThreads) == 0 ? fNumOfThreads : fNumOfThreads + 1); + // uint32_t bucketsPerThread = 1; + // uint32_t numThreads = fNumOfBuckets; runners.reserve(numThreads); - for (uint32_t threadNum = 0; threadNum < numThreads; threadNum++) + for (i = 0; i < numThreads; i++) { runners.push_back(jobstepThreadPool.invoke( - ThreadedSecondPhaseAggregator(this, threadNum * bucketsPerThread, bucketsPerThread))); + ThreadedSecondPhaseAggregator(this, i * bucketsPerThread, bucketsPerThread))); } jobstepThreadPool.join(runners); } - // Deliver results fDoneAggregate = true; bool done = true; - while (nextDeliveredRowGroup() && !cancelled()) + + while (nextDeliveredRowGroup()) { done = false; rowCount = fRowGroupOut.getRowCount(); if (rowCount != 0) { - if (!cleanUpAndOutputRowGroup(bs, dlp)) + if (fRowGroupOut.getColumnCount() != fRowGroupDelivered.getColumnCount()) + pruneAuxColumns(); + + if (dlp) + { + rgData = fRowGroupDelivered.duplicate(); + dlp->insert(rgData); + } + else + { + bs.restart(); + fRowGroupDelivered.serializeRGData(bs); break; + } } done = true; } if (done) - { fEndOfResult = true; - } } } - // Case 2. Query contains at least one aggregation on a DISTINCT column but no GROUP BY column - // e.g. SELECT SUM(DISTINCT col1) FROM test; - else if (distinctAggregator) + else { + auto* agg = dynamic_cast(fAggregator.get()); + if (!fEndOfResult) { if (!fDoneAggregate) { - // Do aggregation over all row groups. As all row groups need to be aggregated together there is no - // easy way of multi-threading this and it's done in a single thread for now. - for (uint32_t bucketNum = 0; bucketNum < fNumOfBuckets; bucketNum++) + for (i = 0; i < fNumOfBuckets; i++) { if (fEndOfResult == false) { - // The distinctAggregator accumulates the aggregation results of all row groups by being added - // all row groups of each bucket aggregator and doing an aggregation step after each addition. - auto* bucketMultiDistinctAggregator = - dynamic_cast(fAggregators[bucketNum].get()); - auto* bucketDistinctAggregator = - dynamic_cast(fAggregators[bucketNum].get()); - distinctAggregator->aggregator(bucketDistinctAggregator->aggregator()); - - if (bucketMultiDistinctAggregator) + // do the final aggregtion and deliver the results + // at least one RowGroup for aggregate results + // for "distinct without group by" case + if (agg != nullptr) { - (dynamic_cast(distinctAggregator)) - ->subAggregators(bucketMultiDistinctAggregator->subAggregators()); - } + auto* aggMultiDist = dynamic_cast(fAggregators[i].get()); + auto* aggDist = dynamic_cast(fAggregators[i].get()); + agg->aggregator(aggDist->aggregator()); - distinctAggregator->aggregator()->finalAggregation(); - distinctAggregator->doDistinctAggregation(); + if (aggMultiDist) + { + (dynamic_cast(agg)) + ->subAggregators(aggMultiDist->subAggregators()); + } + + agg->doDistinctAggregation(); + } + // for "group by without distinct" case + else + { + fAggregator->append(fAggregators[i].get()); + } } } } - // Deliver results fDoneAggregate = true; - bool done = true; - while (fAggregator->nextRowGroup() && !cancelled()) - { - done = false; - fAggregator->finalize(); - rowCount = fRowGroupOut.getRowCount(); - fRowsReturned += rowCount; - fRowGroupDelivered.setData(fRowGroupOut.getRGData()); - - if (rowCount != 0) - { - if (!cleanUpAndOutputRowGroup(bs, dlp)) - break; - } - done = true; - } - if (done) - fEndOfResult = true; - } - } - // CASE 3: Query contains no aggregation on a DISTINCT column, but at least one GROUP BY column - // e.g. SELECT SUM(col1) FROM test GROUP BY col2; - // Do aggregation over all row groups. As all row groups need to be aggregated together there is no - // easy way of multi-threading this and it's done in a single thread for now. - else if (hasGroupByColumns) - { - if (!fEndOfResult && !fDoneAggregate) - { - for (uint32_t bucketNum = 0; bucketNum < fNumOfBuckets; ++bucketNum) - { - fAggregator->append(fAggregators[bucketNum].get()); - } } - fDoneAggregate = true; bool done = true; + //@bug4459 while (fAggregator->nextRowGroup() && !cancelled()) { done = false; @@ -5889,9 +5855,22 @@ uint64_t TupleAggregateStep::doThreadedAggregate(ByteStream& bs, RowGroupDL* dlp if (rowCount != 0) { - if (!cleanUpAndOutputRowGroup(bs, dlp)) + if (fRowGroupOut.getColumnCount() != fRowGroupDelivered.getColumnCount()) + pruneAuxColumns(); + + if (dlp) + { + rgData = fRowGroupDelivered.duplicate(); + dlp->insert(rgData); + } + else + { + bs.restart(); + fRowGroupDelivered.serializeRGData(bs); break; + } } + done = true; } @@ -5900,14 +5879,7 @@ uint64_t TupleAggregateStep::doThreadedAggregate(ByteStream& bs, RowGroupDL* dlp fEndOfResult = true; } } - else - { - throw logic_error( - "TupleAggregateStep::doThreadedAggregate: No DISTINCT columns nested into aggregation function " - "or " - "GROUP BY columns found. Should not reach here."); - } - } + } // try catch (...) { handleException(std::current_exception(), logging::tupleAggregateStepErr, @@ -5947,23 +5919,6 @@ uint64_t TupleAggregateStep::doThreadedAggregate(ByteStream& bs, RowGroupDL* dlp return rowCount; } -bool TupleAggregateStep::cleanUpAndOutputRowGroup(ByteStream& bs, RowGroupDL* dlp) -{ - if (fRowGroupOut.getColumnCount() != fRowGroupDelivered.getColumnCount()) - pruneAuxColumns(); - - if (dlp) - { - RGData rgData = fRowGroupDelivered.duplicate(); - dlp->insert(rgData); - return true; - } - - bs.restart(); - fRowGroupDelivered.serializeRGData(bs); - return false; -} - void TupleAggregateStep::pruneAuxColumns() { uint64_t rowCount = fRowGroupOut.getRowCount(); diff --git a/dbcon/joblist/tupleaggregatestep.h b/dbcon/joblist/tupleaggregatestep.h index 18dc22a91..a0ddcfdfb 100644 --- a/dbcon/joblist/tupleaggregatestep.h +++ b/dbcon/joblist/tupleaggregatestep.h @@ -161,7 +161,6 @@ class TupleAggregateStep : public JobStep, public TupleDeliveryStep void doThreadedSecondPhaseAggregate(uint32_t threadID); bool nextDeliveredRowGroup(); void pruneAuxColumns(); - bool cleanUpAndOutputRowGroup(messageqcpp::ByteStream& bs, RowGroupDL* dlp); void formatMiniStats(); void printCalTrace(); template diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 861e0974a..874e2fb70 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -474,6 +474,22 @@ bool sortItemIsInGrouping(Item* sort_item, ORDER* groupcol) if (sort_item->type() == Item::SUM_FUNC_ITEM) { found = true; + return found; + } + + { + // as we now can warp ORDER BY or SELECT expression into + // an aggregate, we can pass FIELD_ITEM as "found" as well. + Item* item = sort_item; + while (item->type() == Item::REF_ITEM) + { + const Item_ref* ref_item = static_cast(item); + item = (Item*)*ref_item->ref; + } + if (item->type() == Item::FIELD_ITEM || item->type() == Item::CONST_ITEM || item->type() == Item::NULL_ITEM) + { + return true; + } } // A function that contains an aggregate function @@ -498,9 +514,18 @@ bool sortItemIsInGrouping(Item* sort_item, ORDER* groupcol) // is either Field or Func // Consider nonConstFunc() check here if (!found && sort_item->type() == Item::FUNC_ITEM && - (group_item->type() == Item::FUNC_ITEM || group_item->type() == Item::FIELD_ITEM)) + (group_item->type() == Item::FUNC_ITEM || group_item->type() == Item::FIELD_ITEM || + group_item->type() == Item::REF_ITEM)) { - found = sortItemIsInGroupRec(sort_item, group_item); + // MCOL-5236: see @bug5993 and @bug5916. + Item* item = group_item; + while (item->type() == Item::REF_ITEM) + { + Item_ref* item_ref = static_cast(item); + item = *item_ref->ref; + } + + found = sortItemIsInGroupRec(sort_item, item); } } @@ -3180,6 +3205,9 @@ CalpontSystemCatalog::ColType colType_MysqlToIDB(const Item* item) if (item->field_type() == MYSQL_TYPE_BLOB) { + // We default to BLOB, but then try to correct type, + // because many textual types in server have type_handler_blob + // (and variants) as their type. ct.colDataType = CalpontSystemCatalog::BLOB; } } @@ -4395,7 +4423,7 @@ ReturnedColumn* buildFunctionColumn(Item_func* ifp, gp_walk_info& gwi, bool& non // A few functions use a different collation than that found in // the base ifp class - if (funcName == "locate" || funcName == "find_in_set" || funcName == "strcmp") + if (funcName == "locate" || funcName == "find_in_set" || funcName == "strcmp" || funcName == "regexp_instr") { DTCollation dt; ifp->Type_std_attributes::agg_arg_charsets_for_comparison(dt, ifp->func_name_cstring(), @@ -4893,6 +4921,70 @@ static void processAggregateColumnConstArg(gp_walk_info& gwi, SRCP& parm, Aggreg } } +void analyzeForImplicitGroupBy(Item* item, gp_walk_info& gwi) +{ + if (gwi.implicitExplicitGroupBy) + { + return; + } + while (item->type() == Item::REF_ITEM) + { + Item_ref* ref = static_cast(item); + item = *ref->ref; + } + if (item->type() == Item::SUM_FUNC_ITEM) + { + // definitely an aggregate and thus needs an implicit group by. + gwi.implicitExplicitGroupBy = true; + return; + } + if (item->type() == Item::FUNC_ITEM) + { + Item_func* ifp = static_cast(item); + for(uint32_t i = 0;iargument_count() && !gwi.implicitExplicitGroupBy;i++) + { + analyzeForImplicitGroupBy(ifp->arguments()[i], gwi); + } + } +} + +ReturnedColumn* wrapIntoAggregate(ReturnedColumn* rc, gp_walk_info& gwi, SELECT_LEX& select_lex, Item* baseItem) +{ + if (!gwi.implicitExplicitGroupBy) + { + return rc; + } + + if (dynamic_cast(rc) != nullptr || dynamic_cast(rc) != nullptr) + { + return rc; + } + + ORDER* groupcol = static_cast(select_lex.group_list.first); + + while (groupcol) + { + if (baseItem->eq(*groupcol->item, false)) + { + return rc; + } + groupcol = groupcol->next; + } + + cal_connection_info* ci = static_cast(get_fe_conn_info_ptr()); + + AggregateColumn* ac = new AggregateColumn(gwi.sessionid); + ac->timeZone(gwi.timeZone); + ac->alias(rc->alias()); + ac->aggOp(AggregateColumn::SELECT_SOME); + ac->asc(rc->asc()); + ac->charsetNumber(rc->charsetNumber()); + ac->expressionId(ci->expressionId++); + + ac->aggParms().push_back(SRCP(rc)); + return ac; +} + ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) { // MCOL-1201 For UDAnF multiple parameters @@ -5000,8 +5092,9 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) if (ord_col->type() == Item::CONST_ITEM && ord_col->cmp_type() == INT_RESULT) { Item_int* id = (Item_int*)ord_col; + int64_t index = id->val_int(); - if (id->val_int() > (int)selCols.size()) + if (index > (int)selCols.size() || index < 1) { gwi.fatalParseError = true; @@ -5011,8 +5104,8 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi) return NULL; } - rc = selCols[id->val_int() - 1]->clone(); - rc->orderPos(id->val_int() - 1); + rc = selCols[index - 1]->clone(); + rc->orderPos(index - 1); } else { @@ -6830,6 +6923,26 @@ int processFrom(bool& isUnion, SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& // Existed pushdown handlers won't get in this scope // MDEV-25080 Union pushdown would enter this scope // is_unit_op() give a segv for derived_handler's SELECT_LEX + + // check INTERSECT or EXCEPT, that are not implemented + if (select_lex.master_unit() && select_lex.master_unit()->first_select()) + { + for (auto nextSelect = select_lex.master_unit()->first_select()->next_select(); nextSelect; + nextSelect = nextSelect->next_select()) + { + if (nextSelect->get_linkage() == INTERSECT_TYPE) + { + setError(gwi.thd, ER_INTERNAL_ERROR, "INTERSECT is not supported by Columnstore engine", gwi); + return ER_INTERNAL_ERROR; + } + else if (nextSelect->get_linkage() == EXCEPT_TYPE) + { + setError(gwi.thd, ER_INTERNAL_ERROR, "EXCEPT is not supported by Columnstore engine", gwi); + return ER_INTERNAL_ERROR; + } + } + } + if (!isUnion && (!isSelectHandlerTop || isSelectLexUnit) && select_lex.master_unit()->is_unit_op()) { // MCOL-2178 isUnion member only assigned, never used @@ -7454,6 +7567,32 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i } #endif + // analyze SELECT and ORDER BY parts - do they have implicit GROUP BY induced by aggregates? + { + if (select_lex.group_list.first) + { + // we have an explicit GROUP BY. + gwi.implicitExplicitGroupBy = true; + } + else + { + // do we have an implicit GROUP BY? + List_iterator_fast it(select_lex.item_list); + Item* item; + + while ((item = it++)) + { + analyzeForImplicitGroupBy(item, gwi); + } + SQL_I_List order_list = select_lex.order_list; + ORDER* ordercol = static_cast(order_list.first); + + for (; ordercol; ordercol = ordercol->next) + { + analyzeForImplicitGroupBy(*(ordercol->item), gwi); + } + } + } // populate returnedcolumnlist and columnmap List_iterator_fast it(select_lex.item_list); Item* item; @@ -7480,6 +7619,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i // @bug 5916. Need to keep checking until getting concret item in case // of nested view. + Item* baseItem = item; while (item->type() == Item::REF_ITEM) { Item_ref* ref = (Item_ref*)item; @@ -7504,8 +7644,6 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i if (sc) { - boost::shared_ptr spsc(sc); - string fullname; String str; ifp->print(&str, QT_ORDINARY); @@ -7521,10 +7659,14 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i sc->alias(itemAlias); } - gwi.returnedCols.push_back(spsc); + // We need to look into GROUP BY columns to decide if we need to wrap a column. + ReturnedColumn* rc = wrapIntoAggregate(sc, gwi, select_lex, baseItem); + + SRCP sprc(rc); + gwi.returnedCols.push_back(sprc); gwi.columnMap.insert( - CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), spsc)); + CalpontSelectExecutionPlan::ColumnMap::value_type(string(ifp->field_name.str), sprc)); TABLE_LIST* tmp = 0; if (ifp->cached_table) @@ -8192,18 +8334,18 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i ReturnedColumn* rc = buildSimpleColumn(ifp, gwi); SimpleColumn* sc = dynamic_cast(rc); - if (sc) - { - bool found = false; + if (sc) + { + bool found = false; for (uint32_t j = 0; j < gwi.returnedCols.size(); j++) { if (sc->sameColumn(gwi.returnedCols[j].get())) { sc->orderPos(j); - found = true; + found = true; break; } - } + } for (uint32_t j = 0; !found && j < gwi.returnedCols.size(); j++) { if (strcasecmp(sc->alias().c_str(), gwi.returnedCols[j]->alias().c_str()) == 0) @@ -8213,9 +8355,9 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i break; } } - } - else - { + } + else + { for (uint32_t j = 0; j < gwi.returnedCols.size(); j++) { if (ifp->name.length && string(ifp->name.str) == gwi.returnedCols[j].get()->alias()) @@ -8225,7 +8367,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i break; } } - } + } if (!rc) { @@ -8371,6 +8513,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i { if ((*(ordercol->item))->type() == Item::WINDOW_FUNC_ITEM) gwi.hasWindowFunc = true; + // XXX: TODO: implement a proper analysis of what we support. // MCOL-2166 Looking for this sorting item in GROUP_BY items list. // Shouldn't look into this if query doesn't have GROUP BY or // aggregations @@ -8382,10 +8525,10 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i getColNameFromItem(osr, *ordercol->item); Message::Args args; args.add(ostream.str()); - string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_GROUPBY_EXPRESSION, args); + string emsg = IDBErrorInfo::instance()->errorMsg(ERR_NOT_SUPPORTED_GROUPBY_ORDERBY_EXPRESSION, args); gwi.parseErrorText = emsg; setError(gwi.thd, ER_INTERNAL_ERROR, emsg, gwi); - return ERR_NOT_GROUPBY_EXPRESSION; + return ERR_NOT_SUPPORTED_GROUPBY_ORDERBY_EXPRESSION; } } @@ -8435,6 +8578,8 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i else { rc = buildReturnedColumn(ord_item, gwi, gwi.fatalParseError); + + rc = wrapIntoAggregate(rc, gwi, select_lex, ord_item); } // @bug5501 try item_ptr if item can not be fixed. For some // weird dml statement state, item can not be fixed but the @@ -8466,6 +8611,7 @@ int getSelectPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, bool i gwi.orderByCols.push_back(SRCP(rc)); } } + // make sure columnmap, returnedcols and count(*) arg_list are not empty TableMap::iterator tb_iter = gwi.tableMap.begin(); @@ -9947,7 +10093,7 @@ int getGroupPlan(gp_walk_info& gwi, SELECT_LEX& select_lex, SCSEP& csep, cal_gro } } - srcp->orderPos(groupcol->counter - 1); + srcp->orderPos(groupcol->counter - 1); gwi.groupByCols.push_back(srcp); continue; } diff --git a/dbcon/mysql/ha_mcs_impl.cpp b/dbcon/mysql/ha_mcs_impl.cpp index 9eeac01c6..1f6a0031d 100644 --- a/dbcon/mysql/ha_mcs_impl.cpp +++ b/dbcon/mysql/ha_mcs_impl.cpp @@ -401,6 +401,7 @@ int fetchNextRow(uchar* buf, cal_table_info& ti, cal_connection_info* ci, long t // @2835. Handle empty string and null confusion. store empty string for string column if (colType.colDataType == CalpontSystemCatalog::CHAR || colType.colDataType == CalpontSystemCatalog::VARCHAR || + colType.colDataType == CalpontSystemCatalog::TEXT || colType.colDataType == CalpontSystemCatalog::VARBINARY) { (*f)->reset(); diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index b3a9917c1..0acc8c5cd 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -141,6 +141,9 @@ struct gp_walk_info std::vector correlatedTbNameVec; ClauseType clauseType; execplan::CalpontSystemCatalog::TableAliasName viewName; + // we can have explicit GROUP BY and implicit one, triggered by aggregate in pojection or ORDER BY. + // this flag tells us whether we have either case. + bool implicitExplicitGroupBy; bool aggOnSelect; bool hasWindowFunc; bool hasSubSelect; @@ -186,6 +189,7 @@ struct gp_walk_info , subSelectType(uint64_t(-1)) , subQuery(0) , clauseType(INIT) + , implicitExplicitGroupBy(false) , aggOnSelect(false) , hasWindowFunc(false) , hasSubSelect(false) diff --git a/dbcon/mysql/ha_pseudocolumn.cpp b/dbcon/mysql/ha_pseudocolumn.cpp index 04afbd39c..7bd75b6a7 100644 --- a/dbcon/mysql/ha_pseudocolumn.cpp +++ b/dbcon/mysql/ha_pseudocolumn.cpp @@ -428,6 +428,7 @@ execplan::ReturnedColumn* buildPseudoColumn(Item* item, gp_walk_info& gwi, bool& if ((pseudoType == PSEUDO_EXTENTMIN || pseudoType == PSEUDO_EXTENTMAX) && (sc->colType().colDataType == CalpontSystemCatalog::VARBINARY || + (sc->colType().colDataType == CalpontSystemCatalog::TEXT) || (sc->colType().colDataType == CalpontSystemCatalog::VARCHAR && sc->colType().colWidth > 7) || (sc->colType().colDataType == CalpontSystemCatalog::CHAR && sc->colType().colWidth > 8))) return nullOnError(gwi, funcName); diff --git a/ddlproc/ddlprocessor.cpp b/ddlproc/ddlprocessor.cpp index 292e7e87a..330c537f3 100644 --- a/ddlproc/ddlprocessor.cpp +++ b/ddlproc/ddlprocessor.cpp @@ -504,7 +504,7 @@ class PackageHandler qts.schema_name = createTableStmt.schemaName(); fQtc.postQueryTele(qts); - result = processor->processPackage(createTableStmt); + result = processor->processPackage(&createTableStmt); systemCatalogPtr->removeCalpontSystemCatalog(createTableStmt.fSessionID); systemCatalogPtr->removeCalpontSystemCatalog(createTableStmt.fSessionID | 0x80000000); @@ -539,7 +539,7 @@ class PackageHandler processor->fTimeZone = alterTableStmt.getTimeZone(); - result = processor->processPackage(alterTableStmt); + result = processor->processPackage(&alterTableStmt); systemCatalogPtr->removeCalpontSystemCatalog(alterTableStmt.fSessionID); systemCatalogPtr->removeCalpontSystemCatalog(alterTableStmt.fSessionID | 0x80000000); @@ -574,7 +574,7 @@ class PackageHandler fQtc.postQueryTele(qts); // cout << "Drop table using txnid " << fTxnid.id << endl; - result = processor->processPackage(dropTableStmt); + result = processor->processPackage(&dropTableStmt); systemCatalogPtr->removeCalpontSystemCatalog(dropTableStmt.fSessionID); systemCatalogPtr->removeCalpontSystemCatalog(dropTableStmt.fSessionID | 0x80000000); @@ -608,7 +608,7 @@ class PackageHandler qts.schema_name = truncTableStmt.schemaName(); fQtc.postQueryTele(qts); - result = processor->processPackage(truncTableStmt); + result = processor->processPackage(&truncTableStmt); systemCatalogPtr->removeCalpontSystemCatalog(truncTableStmt.fSessionID); systemCatalogPtr->removeCalpontSystemCatalog(truncTableStmt.fSessionID | 0x80000000); @@ -628,7 +628,7 @@ class PackageHandler boost::scoped_ptr processor(new MarkPartitionProcessor(fDbrm)); (processor->fTxnid).id = fTxnid.id; (processor->fTxnid).valid = true; - result = processor->processPackage(markPartitionStmt); + result = processor->processPackage(&markPartitionStmt); systemCatalogPtr->removeCalpontSystemCatalog(markPartitionStmt.fSessionID); systemCatalogPtr->removeCalpontSystemCatalog(markPartitionStmt.fSessionID | 0x80000000); } @@ -643,7 +643,7 @@ class PackageHandler boost::scoped_ptr processor(new RestorePartitionProcessor(fDbrm)); (processor->fTxnid).id = fTxnid.id; (processor->fTxnid).valid = true; - result = processor->processPackage(restorePartitionStmt); + result = processor->processPackage(&restorePartitionStmt); systemCatalogPtr->removeCalpontSystemCatalog(restorePartitionStmt.fSessionID); systemCatalogPtr->removeCalpontSystemCatalog(restorePartitionStmt.fSessionID | 0x80000000); } @@ -658,7 +658,7 @@ class PackageHandler boost::scoped_ptr processor(new DropPartitionProcessor(fDbrm)); (processor->fTxnid).id = fTxnid.id; (processor->fTxnid).valid = true; - result = processor->processPackage(dropPartitionStmt); + result = processor->processPackage(&dropPartitionStmt); systemCatalogPtr->removeCalpontSystemCatalog(dropPartitionStmt.fSessionID); systemCatalogPtr->removeCalpontSystemCatalog(dropPartitionStmt.fSessionID | 0x80000000); } diff --git a/dmlproc/dmlprocessor.cpp b/dmlproc/dmlprocessor.cpp index c2e989b06..34529212f 100644 --- a/dmlproc/dmlprocessor.cpp +++ b/dmlproc/dmlprocessor.cpp @@ -67,7 +67,7 @@ extern boost::condition_variable cond; namespace { -const std::string myname = "DMLProc"; +[[maybe_unused]] const std::string myname = "DMLProc"; } namespace dmlprocessor @@ -541,6 +541,25 @@ int PackageHandler::clearTableAccess() return 1; } +CalpontSystemCatalog::ROPair PackageHandler::getTableRID( + boost::shared_ptr fcsc, + execplan::CalpontSystemCatalog::TableName& tableName) +{ + execplan::CalpontSystemCatalog::ROPair roPair; + try + { + roPair = fcsc->tableRID(tableName); + } + catch (...) + { + if (setupDec()) + throw; + roPair = fcsc->tableRID(tableName); + } + + return roPair; +} + void PackageHandler::run() { ResourceManager* frm = ResourceManager::instance(); @@ -577,7 +596,7 @@ void PackageHandler::run() CalpontSystemCatalog::TableName tableName; tableName.schema = insertPkg.get_Table()->get_SchemaName(); tableName.table = insertPkg.get_Table()->get_TableName(); - CalpontSystemCatalog::ROPair roPair = fcsc->tableRID(tableName); + CalpontSystemCatalog::ROPair roPair = getTableRID(fcsc, tableName); fTableOid = roPair.objnum; } synchTable.setPackage(this, &insertPkg); // Blocks if another DML thread is using this fTableOid @@ -976,7 +995,7 @@ void PackageHandler::run() CalpontSystemCatalog::TableName tableName; tableName.schema = updatePkg->get_Table()->get_SchemaName(); tableName.table = updatePkg->get_Table()->get_TableName(); - CalpontSystemCatalog::ROPair roPair = fcsc->tableRID(tableName); + CalpontSystemCatalog::ROPair roPair = getTableRID(fcsc, tableName); fTableOid = roPair.objnum; } synchTable.setPackage(this, @@ -1036,7 +1055,7 @@ void PackageHandler::run() CalpontSystemCatalog::TableName tableName; tableName.schema = deletePkg->get_Table()->get_SchemaName(); tableName.table = deletePkg->get_Table()->get_TableName(); - CalpontSystemCatalog::ROPair roPair = fcsc->tableRID(tableName); + CalpontSystemCatalog::ROPair roPair = getTableRID(fcsc, tableName); fTableOid = roPair.objnum; } synchTable.setPackage(this, diff --git a/dmlproc/dmlprocessor.h b/dmlproc/dmlprocessor.h index fbc7c32d6..6f21d8d9a 100644 --- a/dmlproc/dmlprocessor.h +++ b/dmlproc/dmlprocessor.h @@ -199,6 +199,15 @@ class PackageHandler } private: + int32_t setupDec() + { + joblist::ResourceManager* rm = joblist::ResourceManager::instance(true); + joblist::DistributedEngineComm* fEc = joblist::DistributedEngineComm::instance(rm); + return fEc->Setup(); + } + execplan::CalpontSystemCatalog::ROPair getTableRID(boost::shared_ptr fcsc, + execplan::CalpontSystemCatalog::TableName& tableName); + messageqcpp::IOSocket fIos; boost::shared_ptr fByteStream; boost::scoped_ptr fProcessor; @@ -311,22 +320,21 @@ class RollbackTransactionProcessor : public dmlpackageprocessor::DMLPackageProce RollbackTransactionProcessor(BRM::DBRM* aDbrm) : DMLPackageProcessor(aDbrm, 1) { } - /** @brief process an Rollback transactions - * - * @param cpackage the UpdateDMLPackage to process - */ - inline DMLResult processPackage(dmlpackage::CalpontDMLPackage& cpackage) - { - DMLResult result; - result.result = NO_ERROR; - return result; - } void processBulkRollback(BRM::TableLockInfo lockInfo, BRM::DBRM* dbrm, uint64_t uniqueId, oam::OamCache::dbRootPMMap_t& dbRootPMMap, bool& lockReleased); protected: + private: + /** @brief process an Rollback transactions + * + * @param cpackage the UpdateDMLPackage to process + */ + DMLResult processPackageInternal(dmlpackage::CalpontDMLPackage& cpackage) + { + DMLResult result; + result.result = NO_ERROR; + return result; + } }; - } // namespace dmlprocessor - diff --git a/extra/cs_package_manager.sh b/extra/cs_package_manager.sh deleted file mode 100644 index 6346fb789..000000000 --- a/extra/cs_package_manager.sh +++ /dev/null @@ -1,839 +0,0 @@ -#!/bin/bash -# Documentation -# ./ cs_package_manager.sh help - -# Variables -enterprise_token="" -dev_drone_key="" - -if [ ! -f /var/lib/columnstore/local/module ]; then pm="pm1"; else pm=$(cat /var/lib/columnstore/local/module); fi; -pm_number=$(echo "$pm" | tr -dc '0-9') -action=$1 - -print_help_text() { - echo "Version 1.0 - -Example Remove: - bash cs_package_manager.sh remove - bash cs_package_manager.sh remove all - -Example Install: - bash cs_package_manager.sh install [enterprise|community|dev] [version|branch] [build num] - bash cs_package_manager.sh install enterprise 10.6.12-8 - bash cs_package_manager.sh install community 11.1 - bash cs_package_manager.sh install dev develop cron/8629 - bash cs_package_manager.sh install dev develop-23.02 pull_request/7256 - -Example Check: - bash cs_package_manager.sh check community - bash cs_package_manager.sh check enterprise -" -} - -wait_cs_down() { - retries=$1; - - if [ $retries -gt 6 ]; then - printf "\n[!!!] Columnstore is still online ... exiting \n\n" - exit 2; - fi; - - # if columnstore still online stop - if [ -z $(pidof PrimProc) ]; then - printf "[+] Confirmation: columnstore OFFLINE \n"; - mcs_offine=true - return 1; - else - printf "\n[+] Columnstore is ONLINE - waiting 5s to retry, attempt: $retries...\n"; - sleep 5 - ((retries++)) - wait_cs_down $retries - fi; -} - -print_and_delete() { - printf " - %-25s ..." "$1" - rm -rf $1 - printf " Done\n" -} - -init_cs_down() { - mcs_offine=false - if [ "$pm_number" == "1" ]; then - if [ -z $(pidof PrimProc) ]; then - # printf "\n[+] Columnstore offline already"; - mcs_offine=true - else - - # Adjust for package manager - cmapi_installed_command="" - case $package_manager in - yum ) - cmapi_installed_command="yum list installed MariaDB-columnstore-cmapi &> /dev/null;"; - ;; - apt ) - cmapi_installed_command="dpkg-query -s mariadb-columnstore-cmapi &> /dev/null;"; - ;; - *) # unknown option - echo "\npackage manager not implemented: $package_manager\n" - exit 2; - esac - - # Check cmapi installed - if eval $cmapi_installed_command ; then - - # Check for edge case - if [ "$(mcs cluster status | jq -r '.num_nodes')" == "0" ]; then - printf "[!!] Noticed cmapi installed but no nodes configured...\n" - add_primary_node_cmapi - sleep 1; - fi - - # Stop columnstore - printf "\n[+] Stopping columnstore ... \n"; - if command -v mcs &> /dev/null; then - if ! mcs cluster stop; then - echo "[!] Failed stopping via mcs ... trying cmapi curl" - stop_cs_cmapi_via_curl - fi - else - stop_cs_cmapi_via_curl - fi - else - stop_cs_via_systemctl - fi - fi - fi; -} - -do_yum_remove() { - - if ! command -v yum &> /dev/null ; then - printf "[!!] Cant access yum\n" - exit 1; - fi - - init_cs_down - - wait_cs_down 0 - - printf "[+] Removing packages - $(date) ... \n" - - if yum list installed MariaDB-server &>/dev/null; then - systemctl stop mariadb - fi - - if yum list installed MariaDB-columnstore-cmapi &>/dev/null; then - systemctl stop mariadb-columnstore-cmapi - fi - - # remove any mdb rpms on disk - if ls MariaDB-*.rpm &>/dev/null; then - print_and_delete "MariaDB-*.rpm" - fi - - # remove all current MDB packages - if yum list installed MariaDB-* &>/dev/null; then - yum remove MariaDB-* -y - fi - - # remove offical & custom yum repos - print_and_delete "/etc/yum.repos.d/mariadb.repo" - print_and_delete "/etc/yum.repos.d/drone.repo" -} - -do_apt_remove() { - - if ! command -v apt &> /dev/null ; then - printf "[!!] Cant access apt\n" - exit 1; - fi - - if ! command -v dpkg-query &> /dev/null ; then - printf "[!!] Cant access dpkg-query\n" - exit 1; - fi - - init_cs_down - - wait_cs_down 0 - - printf "[+] Removing packages - $(date) ... \n" - - if dpkg-query -s mariadb-server &>/dev/null; then - systemctl stop mariadb - fi - - if dpkg-query -s mariadb-columnstore-cmapi &>/dev/null; then - systemctl stop mariadb-columnstore-cmapi - fi - - # remove any mdb rpms on disk - if ls mariadb*.deb &>/dev/null; then - print_and_delete "mariadb*.deb" - fi - - # remove all current MDB packages - if [ "$(apt list --installed mariadb-* 2>/dev/null | wc -l)" -gt 1 ]; then - apt remove mariadb-* -y - fi - - # remove offical & custom yum repos - print_and_delete "/lib/systemd/system/mariadb.service" - print_and_delete "/lib/systemd/system/mariadb.service.d" - print_and_delete "/etc/apt/sources.list.d/mariadb.list" - print_and_delete "/etc/apt/sources.list.d/drone.list" - systemctl daemon-reload -} - -do_remove() { - - check_operating_system - check_package_managers - - case $distro_info in - centos | rocky ) - do_yum_remove - ;; - # debian ) - - # ;; - ubuntu ) - do_apt_remove - ;; - *) # unknown option - echo "\nos & version not implemented: $distro_info\n" - exit 2; - esac - - if [ "$2" == "all" ]; then - printf "\n[+] Removing all columnstore files & dirs\n" - print_and_delete "/var/lib/mysql/" - print_and_delete "/var/lib/columnstore/" - print_and_delete "/etc/my.cnf.d/*" - print_and_delete "/etc/columnstore/*" - fi; - - printf "\n[+] Done\n\n" -} - -check_package_managers() { - - package_manager=''; - if command -v apt &> /dev/null ; then - if ! command -v dpkg-query &> /dev/null ; then - printf "[!!] Cant access dpkg-query\n" - exit 1; - fi - package_manager="apt"; - fi - - if command -v yum &> /dev/null ; then - package_manager="yum"; - fi - - if [ $package_manager == '' ]; then - echo "[!!] No package manager found: yum or apt must be installed" - exit 1; - fi; -} - -check_operating_system() { - - distro_info=$(awk -F= '/^ID=/{gsub(/"/, "", $2); print $2}' /etc/os-release) - version_id=$(grep 'VERSION_ID=' /etc/os-release | awk -F= '{gsub(/"/, "", $2); print $2}' | awk -F. '{print $1}') - - echo "Distro: $distro_info" - echo "Version: $version_id" - - # distros=(centos7 debian11 debian12 rockylinux8 rockylinux9 ubuntu20.04 ubuntu22.04) - case $distro_info in - centos ) - distro="${distro_info}${version_id}" - ;; - debian ) - distro="${distro_info}${version_id}" - ;; - rocky ) - distro="rockylinux${version_id}" - ;; - ubuntu ) - distro="${distro_info}${version_id}" - ;; - *) # unknown option - printf "\ncheck_operating_system: unknown os & version: $distro_info\n" - exit 2; - esac -} - -check_cpu_architecture() { - - architecture=$(uname -m) - echo "CPU: $architecture" - - # arch=(amd64 arm64) - case $architecture in - x86_64 ) - arch="amd64" - ;; - aarch64 ) - arch="arm64" - ;; - *) # unknown option - echo "Error: Unsupported architecture ($architecture)" - esac -} - -check_no_mdb_installed() { - - packages="" - case $distro_info in - centos ) - packages=$(yum list installed | grep -i mariadb) - ;; - # debian ) - - # ;; - rocky ) - packages=$(yum list installed | grep -i mariadb) - ;; - ubuntu ) - packages=$(apt list --installed mariadb-* 2>/dev/null | grep -i mariadb); - ;; - *) # unknown option - printf "\ncheck_no_mdb_installed: os & version not implemented: $distro_info\n" - exit 2; - esac - - if [ -n "$packages" ]; then - printf "\nMariaDB packages are installed. Please uninstall them before continuing.\n" - echo $packages; - printf "Example: bash $0 remove\n\n" - exit 2; - fi; -} - -check_aws_cli_installed() { - - if ! command -v aws &> /dev/null ; then - echo "[!] aws cli - binary could not be found" - echo "[+] Installing aws cli ..." - rm -rf aws awscliv2.zip - yum install unzip -y; - curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"; - unzip awscliv2.zip; - sudo ./aws/install; - mv /usr/local/bin/aws /usr/bin/aws; - aws configure set default.s3.max_concurrent_requests 700 - fi -} - -check_dev_build_exists() { - - if ! aws s3 ls $s3_path --no-sign-request &> /dev/null; then - printf "Defined dev build doesnt exist in aws\n\n" - exit 2; - fi; -} - -do_enterprise_apt_install() { - - # Install MariaDB - apt-get clean - apt install mariadb-server -y - sleep 2 - systemctl daemon-reload - systemctl enable mariadb - systemctl start mariadb - - # Install Columnstore - apt install mariadb-plugin-columnstore mariadb-columnstore-cmapi jq -y - systemctl daemon-reload - systemctl enable mariadb-columnstore-cmapi - systemctl start mariadb-columnstore-cmapi - mariadb -e "show status like '%Columnstore%';" - sleep 2 - - # Startup cmapi - if defined - add_primary_node_cmapi -} - -do_enterprise_yum_install() { - - # Install MariaDB - yum clean all - yum install MariaDB-server -y - sleep 2 - systemctl enable mariadb - systemctl start mariadb - - # Install Columnstore - yum install MariaDB-columnstore-engine MariaDB-columnstore-cmapi -y - systemctl enable mariadb-columnstore-cmapi - systemctl start mariadb-columnstore-cmapi - mariadb -e "show status like '%Columnstore%';" - sleep 1; - - # Startup cmapi - if defined - add_primary_node_cmapi -} - -enterprise_install() { - - version=$3 - if [ -z $enterprise_token ]; then - printf "Enterprise token empty: $enterprise_token\n" - printf "edit $0 to add token\n\n" - - exit 1; - fi; - - if [ -z $version ]; then - printf "Version empty: $version\n" - exit 1; - fi; - - echo "Token: $enterprise_token" - echo "MariaDB Version: $version" - - # Download Repo setup - rm -rf mariadb_es_repo_setup - wget https://dlm.mariadb.com/enterprise-release-helpers/mariadb_es_repo_setup ;chmod +x mariadb_es_repo_setup; - if ! bash mariadb_es_repo_setup --token="$enterprise_token" --apply --mariadb-server-version="$version"; then - echo "exiting ..." - exit 2; - fi; - - case $distro_info in - centos | rocky ) - do_enterprise_yum_install "$@" - ;; - # debian ) - - # ;; - ubuntu ) - do_enterprise_apt_install "$@" - ;; - *) # unknown option - printf "\nos & version not implemented: $distro_info\n" - exit 2; - esac -} - -community_install() { - - version=$3 - if [ -z $version ]; then - printf "Version empty: $version\n" - - exit 1; - fi; - - echo "MariaDB Community Version: $version" - - # Download Repo setup - rm -rf mariadb_repo_setup - - if ! curl -sS https://downloads.mariadb.com/MariaDB/mariadb_repo_setup | bash -s -- --mariadb-server-version=mariadb-$version ; then - echo "version bad. exiting ..." - exit 2; - fi; - yum clean all - - # Install MariaDB then Columnstore - yum install MariaDB-server -y - sleep 2; - systemctl enable mariadb - systemctl start mariadb - yum install MariaDB-columnstore-engine MariaDB-columnstore-cmapi -y - systemctl enable mariadb-columnstore-cmapi - systemctl start mariadb-columnstore-cmapi - mariadb -e "show status like '%Columnstore%';" - - add_primary_node_cmapi -} - -get_set_cmapi_key() { - - CMAPI_CNF="/etc/columnstore/cmapi_server.conf" - - if [ ! -f $CMAPI_CNF ]; then - echo "[!!] No cmapi config file found" - exit 1; - fi; - - # Add API Key if missing - if [ -z "$(grep ^x-api-key $CMAPI_CNF)" ]; then - - if ! command -v openssl &> /dev/null ; then - api_key="19bb89d77cb8edfe0864e05228318e3dfa58e8f45435fbd9bd12c462a522a1e9" - else - api_key=$(openssl rand -hex 32) - fi - # mcs cluster set api-key --key $primary_ip - echo "[+] Setting API Key:" - if curl -s https://127.0.0.1:8640/cmapi/0.4.0/cluster/status \ - --header 'Content-Type:application/json' \ - --header "x-api-key:$api_key" -k ; then - sleep 2; - else - echo "Failed to set API key" - exit 1; - fi - else - api_key=$(grep ^x-api-key $CMAPI_CNF | cut -d "=" -f 2 | tr -d " ") - fi -} - -add_node_cmapi_via_curl() { - - node_ip=$1 - if [ -z $api_key ]; then get_set_cmapi_key; fi; - - # Add Node - printf "\n[+] Adding primary node: \n" - if curl -k -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/node \ - --header 'Content-Type:application/json' \ - --header "x-api-key:$api_key" \ - --data "{\"timeout\": 120, \"node\": \"$node_ip\"}"; then - printf "\n[+] Success adding $node_ip\n" - else - echo "Failed adding node" - exit 1; - fi - -} - -start_cs_via_systemctl() { - if systemctl start mariadb-columnstore ; then - echo "[+] Started Columnstore" - else - echo "[!!] Failed to start columnstore" - exit 1; - fi; -} - -start_cs_cmapi_via_curl() { - - if [ -z $api_key ]; then get_set_cmapi_key; fi; - - if curl -k -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/start \ - --header 'Content-Type:application/json' \ - --header "x-api-key:$api_key" \ - --data '{"timeout":20}'; then - echo "[+] Started Columnstore" - else - echo "[!] Failed to start columnstore" - echo "Trying via systemctl ..." - start_cs_via_systemctl - fi; -} - -stop_cs_via_systemctl() { - if systemctl stop mariadb-columnstore ; then - echo "[+] Stopped Columnstore" - else - echo "[!!] Failed to stop columnstore" - exit 1; - fi; -} - -stop_cs_cmapi_via_curl() { - - if [ -z $api_key ]; then get_set_cmapi_key; fi; - - if curl -k -s -X PUT https://127.0.0.1:8640/cmapi/0.4.0/cluster/shutdown \ - --header 'Content-Type:application/json' \ - --header "x-api-key:$api_key" \ - --data '{"timeout":20}'; then - echo "[+] Stopped Columnstore" - else - echo "[!] Failed to stop columnstore via cmapi" - echo "Trying via systemctl ..." - stop_cs_via_systemctl - fi; -} - -add_primary_node_cmapi() { - - primary_ip="127.0.0.1" - get_set_cmapi_key - - if ! command -v mcs &> /dev/null ; then - echo "mcs - binary could not be found" - add_node_cmapi_via_curl $primary_ip - echo "[+] Starting Columnstore ..." - start_cs_cmapi_via_curl - - else - if [ "$(mcs cluster status | jq -r '.num_nodes')" == "0" ]; then - - echo "[+] Adding primary node ..." - if timeout 30s mcs cluster node add --node $primary_ip; then - echo "[+] Success adding $primary_ip" - else - echo "[!] Failed ... trying cmapi curl" - add_node_cmapi_via_curl $primary_ip - fi; - fi; - echo "[+] Starting Columnstore ..." - mcs cluster start - fi -} - -dev_install() { - - if [ -z $dev_drone_key ]; then printf "Missing dev_drone_key: \n"; exit; fi; - check_aws_cli_installed - - echo "Branch: $3" - echo "Build: $4" - dronePath="s3://$dev_drone_key" - branch="$3" - build="$4" - product="10.6-enterprise" - - # Construct URLs - s3_path="$dronePath/$branch/$build/$product/$arch/$distro" - replace="https://$dev_drone_key.s3.amazonaws.com/" - # Use sed to replace the s3 path to create https path - yum_http=$(echo "$s3_path" | sed "s|s3://$dev_drone_key/|$replace|") - echo "Locations:" - echo "RPM: $s3_path" - echo "Yum: $yum_http" - echo "###################################" - - check_dev_build_exists - - echo "[drone] -name=Drone Repository -baseurl="$yum_http" -gpgcheck=0 -enabled=1 - " > /etc/yum.repos.d/drone.repo - yum clean all - # yum makecache - # yum list --disablerepo="*" --enablerepo="drone" - - # ALL RPMS: aws s3 cp $s3_path/ . --recursive --exclude "debuginfo" --include "*.rpm" - aws s3 cp $s3_path/ . --recursive --exclude "*" --include "MariaDB-server*" --exclude "*debug*" --no-sign-request - - # Confirm Downloaded server rpm - if ! ls MariaDB-server-*.rpm 1> /dev/null 2>&1; then - echo "Error: No MariaDB-server RPMs were found." - exit 1 - fi - - yum install MariaDB-server-*.rpm -y - yum install MariaDB-columnstore-engine MariaDB-columnstore-cmapi -y - systemctl start mariadb - systemctl start mariadb-columnstore-cmapi - mariadb -e "show status like '%Columnstore%';" - - add_primary_node_cmapi -} - -do_install() { - - check_operating_system - check_cpu_architecture - check_no_mdb_installed - - repo=$2 - echo "Repository: $repo" - case $repo in - enterprise ) - # pull from public enterprise repo - enterprise_install "$@" ; - ;; - community ) - # pull from public community repo - community_install "$@" ; - ;; - dev ) - dev_install "$@" ; - ;; - *) # unknown option - echo "Unknown repo: $repo\n" - exit 2; - esac - - printf "\nDone - $(date)\n" -} - -do_check() { - - check_operating_system - check_cpu_architecture - - repo=$2 - echo "Repository: $repo" - case $repo in - enterprise ) - - if [ -z $enterprise_token ]; then - printf "Enterprise token empty: $enterprise_token\n" - printf "edit $0 to add token\n\n" - exit 1; - fi; - - url_base="https://dlm.mariadb.com" - url_page="/browse/$enterprise_token/mariadb_enterprise_server/" - ignore="/login" - curl -s "$url_base$url_page" > mdb-tmp.html - major_version_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep $url_page | grep -v $ignore ) - #echo $major_version_links - for major_link in ${major_version_links[@]} - do - #echo "Major: $major_link" - curl -s "$url_base$major_link" > mdb-tmp.html - minor_version_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep $url_page | grep -v $ignore ) - for minor_link in ${minor_version_links[@]} - do - if [ "$minor_link" != "$url_page" ]; then - #echo " Minor: $minor_link" - case $distro_info in - centos | rocky ) - path="rpm/rhel/$version_id/$architecture/rpms/" - curl -s "$url_base$minor_link$path" > mdb-tmp.html - package_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep "$path" | grep "columnstore-engine" | grep -v debug | tail -1 ) - if [ ! -z "$package_links" ]; then - #echo "----------" - #echo "$package_links" - - mariadb_version="${package_links#*mariadb-enterprise-server/}" - columnstore_version="${mariadb_version#*columnstore-engine-}" - mariadb_version="$( echo $mariadb_version | awk -F/ '{print $1}' )" - - # unqiue to enterprise - standard_mariadb_version="${mariadb_version//-/_}" - columnstore_version="$( echo $columnstore_version | awk -F"${standard_mariadb_version}_" '{print $2}' | awk -F".el" '{print $1}' )" - printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; - fi; - ;; - ubuntu ) - path="deb/pool/main/m/" - curl -s "$url_base$minor_link$path" > mdb-tmp.html - - # unqiue - this link/path can change - mariadb_version_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep -v $ignore | grep -v cmapi | grep ^mariadb ) - #echo "$url_base$minor_link$path" - for mariadb_link in ${mariadb_version_links[@]} - do - #echo $mariadb_link - path="deb/pool/main/m/$mariadb_link" - curl -s "$url_base$minor_link$path" > mdb-tmp.html - package_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep "$path" | grep "columnstore_" | grep -v debug | tail -1 ) - if [ ! -z "$package_links" ]; then - # echo "$package_links" - # echo "----------" - mariadb_version="${package_links#*mariadb-enterprise-server/}" - columnstore_version="${mariadb_version#*columnstore-engine-}" - mariadb_version="$( echo $mariadb_version | awk -F/ '{print $1}' )" - columnstore_version="$( echo $columnstore_version | awk -F"columnstore_" '{print $2}' | awk -F"-" '{print $2}' | awk -F"+maria" '{print $1}' )" - # echo "MariaDB: $mariadb_version Columnstore: $columnstore_version" - printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; - fi; - done - - - ;; - *) # unknown option - printf "Not implemented for: $distro_info\n" - exit 2; - esac - - - fi; - done - done - ;; - community ) - - # pull from public community repo - url_base="https://dlm.mariadb.com" - url_page="/browse/mariadb_server/" - ignore="/login" - curl -s "$url_base$url_page" > mdb-tmp.html - major_version_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep $url_page | grep -v $ignore ) - - for major_link in ${major_version_links[@]} - do - #echo "Major: $major_link" - curl -s "$url_base$major_link" > mdb-tmp.html - minor_version_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep $url_page | grep -v $ignore ) - for minor_link in ${minor_version_links[@]} - do - if [ "$minor_link" != "$url_page" ]; then - #echo " Minor: $minor_link" - case $distro_info in - centos | rocky ) - path="yum/centos/$version_id/$architecture/rpms/" - curl -s "$url_base$minor_link$path" > mdb-tmp.html - package_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep "$path" | grep "columnstore-engine" | grep -v debug | tail -1 ) - if [ ! -z "$package_links" ]; then - # echo "$package_links" - # echo "----------" - mariadb_version="${package_links#*mariadb-}" - columnstore_version="${mariadb_version#*columnstore-engine-}" - mariadb_version="$( echo $mariadb_version | awk -F/ '{print $1}' )" - columnstore_version="$( echo $columnstore_version | awk -F_ '{print $2}' | awk -F".el" '{print $1}' )" - # echo "MariaDB: $mariadb_version Columnstore: $columnstore_version" - printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; - fi; - ;; - ubuntu ) - path="repo/$distro_info/pool/main/m/mariadb/" - curl -s "$url_base$minor_link$path" > mdb-tmp.html - package_links=$(grep -oP 'href="\K[^"]+' mdb-tmp.html | grep "$path" | grep "columnstore_" | grep -v debug | tail -1 ) - if [ ! -z "$package_links" ]; then - # echo "$package_links" - # echo "----------" - mariadb_version="${package_links#*mariadb-}" - columnstore_version="${mariadb_version#*columnstore-engine-}" - mariadb_version="$( echo $mariadb_version | awk -F/ '{print $1}' )" - columnstore_version="$( echo $columnstore_version | awk -F"columnstore_" '{print $2}' | awk -F"-" '{print $2}' | awk -F"+maria" '{print $1}' )" - # echo "MariaDB: $mariadb_version Columnstore: $columnstore_version" - printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; - fi; - ;; - *) # unknown option - printf "Not implemented for: $distro_info\n" - exit 2; - esac - - - fi; - done - done - ;; - dev ) - printf "Not implemented for: $repo\n" - exit 1; - ;; - *) # unknown option - printf "Unknown repo: $repo\n" - exit 2; - esac -} - - -case $action in - remove ) - do_remove "$@" ; - ;; - install ) - do_install "$@"; - ;; - check ) - do_check "$@" - ;; - help | -h | --help | -help) - print_help_text; - exit 1; - ;; - *) # unknown option - printf "Unknown Action: $1\n" - print_help_text - exit 2; -esac diff --git a/mysql-test/columnstore/basic/disabled.def b/mysql-test/columnstore/basic/disabled.def index 171b471ff..aa0cfc49c 100644 --- a/mysql-test/columnstore/basic/disabled.def +++ b/mysql-test/columnstore/basic/disabled.def @@ -6,3 +6,4 @@ mcs118_charset_negative : mcs error code has changed 2022-07-07 roman.navrotskiy mcs16_functions_define_call_drop : 2022-07-08 roman.navrotskiy@mariadb.com udf_calshowpartitions : unstable values for min/max in the output(sometimes N/A, sometimes numbers) 2022-07-26 roman.nozdrin@mariadb.com mcs3_create_table_charset_collations : 10.6 vs 10.9 show create table difference +pron : pron is not threadsfe and doesn't work with PrimProc TDB: leonid.fedorov@mariadb.com diff --git a/mysql-test/columnstore/basic/r/MCOL-5035-update-insert-same-behavior.result b/mysql-test/columnstore/basic/r/MCOL-5035-update-insert-same-behavior.result new file mode 100644 index 000000000..413f18e45 --- /dev/null +++ b/mysql-test/columnstore/basic/r/MCOL-5035-update-insert-same-behavior.result @@ -0,0 +1,17 @@ +DROP DATABASE IF EXISTS MCOL5535; +CREATE DATABASE MCOL5535; +USE MCOL5535; +CREATE TABLE t1(i INTEGER, x DATETIME) ENGINE=COLUMNSTORE; +INSERT INTO t1 (i, x) VALUES (1, '0000-00-00 00:00:00'), (2, NULL), (3, '2024-01-01 01:01:01'); +SELECT * FROM t1; +i x +1 0000-00-00 00:00:00 +2 NULL +3 2024-01-01 01:01:01 +UPDATE t1 SET x='0000-00-00 00:00:00' WHERE i = 3; +SELECT * FROM t1; +i x +1 0000-00-00 00:00:00 +2 NULL +3 0000-00-00 00:00:00 +DROP DATABASE MCOL5535; diff --git a/mysql-test/columnstore/basic/r/MCOL-5772-hidden-order-by.result b/mysql-test/columnstore/basic/r/MCOL-5772-hidden-order-by.result new file mode 100644 index 000000000..8a4fcdf73 --- /dev/null +++ b/mysql-test/columnstore/basic/r/MCOL-5772-hidden-order-by.result @@ -0,0 +1,23 @@ +DROP DATABASE IF EXISTS MCOL5772; +CREATE DATABASE MCOL5772; +USE MCOL5772; +CREATE TABLE products ( +product_id INT, +product_name VARCHAR(100), +category VARCHAR(50), +unit_price DECIMAL(10, 2), +stock_quantity INT +) ENGINE=Columnstore; +INSERT INTO products VALUES +(1, 'Laptop', 'Electronics', 1200.00, 50), +(2, 'Smartphone', 'Electronics', 800.00, 100), +(3, 'Coffee Maker', 'Appliances', 50.00, 30), +(4, 'Backpack', 'Fashion', 40.00, 80), +(5, 'Desk Chair', 'Furniture', 150.00, 20); +SELECT product_name, SUM(stock_quantity) AS total_stock FROM products GROUP BY category ORDER BY stock_quantity; +product_name total_stock +Desk Chair 20 +Coffee Maker 30 +Backpack 80 +Smartphone 150 +DROP DATABASE MCOL5772; diff --git a/mysql-test/columnstore/basic/r/mcol-4525.result b/mysql-test/columnstore/basic/r/mcol-4525.result index 10dd190b6..8c3d0151a 100644 --- a/mysql-test/columnstore/basic/r/mcol-4525.result +++ b/mysql-test/columnstore/basic/r/mcol-4525.result @@ -80,7 +80,8 @@ SET columnstore_select_handler=ON; SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2; ERROR 42000: The storage engine for the table doesn't support MCS-2016: Non supported item 'col2' on the GROUP BY list. SELECT col1 c FROM t1 ORDER BY AVG(col1); -ERROR HY000: Internal error: MCS-2021: 'c' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause. +c +10 SET columnstore_select_handler=AUTO; SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2; col2 diff --git a/mysql-test/columnstore/basic/r/mcs228_regexp_operator.result b/mysql-test/columnstore/basic/r/mcs228_regexp_operator.result index 86d1cb985..2597132c2 100644 --- a/mysql-test/columnstore/basic/r/mcs228_regexp_operator.result +++ b/mysql-test/columnstore/basic/r/mcs228_regexp_operator.result @@ -42,6 +42,96 @@ t1_INT t1_INT REGEXP '-26' -7299 0 103 0 9913 0 +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '99$') FROM t1 ORDER BY 1; +t1_INT REGEXP_SUBSTR(t1_INT, '99$') +-7299 99 +103 +9913 +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1; +t1_INT REGEXP_SUBSTR(t1_INT, '\.99$') +-7299 299 +103 +9913 +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '03$') FROM t1 ORDER BY 1; +t1_INT REGEXP_SUBSTR(t1_INT, '03$') +-7299 +103 03 +9913 +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.+') FROM t1 ORDER BY 1; +t1_INT REGEXP_SUBSTR(t1_INT, '\.+') +-7299 -7299 +103 103 +9913 9913 +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '0?') FROM t1 ORDER BY 1; +t1_INT REGEXP_SUBSTR(t1_INT, '0?') +-7299 +103 +9913 +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '-26') FROM t1 ORDER BY 1; +t1_INT REGEXP_SUBSTR(t1_INT, '-26') +-7299 +103 +9913 +SELECT t1_INT, REGEXP_INSTR(t1_INT, '99$') FROM t1 ORDER BY 1; +t1_INT REGEXP_INSTR(t1_INT, '99$') +-7299 4 +103 0 +9913 0 +SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1; +t1_INT REGEXP_INSTR(t1_INT, '\.99$') +-7299 3 +103 0 +9913 0 +SELECT t1_INT, REGEXP_INSTR(t1_INT, '03$') FROM t1 ORDER BY 1; +t1_INT REGEXP_INSTR(t1_INT, '03$') +-7299 0 +103 2 +9913 0 +SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.+') FROM t1 ORDER BY 1; +t1_INT REGEXP_INSTR(t1_INT, '\.+') +-7299 1 +103 1 +9913 1 +SELECT t1_INT, REGEXP_INSTR(t1_INT, '0?') FROM t1 ORDER BY 1; +t1_INT REGEXP_INSTR(t1_INT, '0?') +-7299 1 +103 1 +9913 1 +SELECT t1_INT, REGEXP_INSTR(t1_INT, '-26') FROM t1 ORDER BY 1; +t1_INT REGEXP_INSTR(t1_INT, '-26') +-7299 0 +103 0 +9913 0 +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '99$', 'TeddyBear') FROM t1 ORDER BY 1; +t1_INT REGEXP_REPLACE(t1_INT, '99$', 'TeddyBear') +-7299 -72TeddyBear +103 103 +9913 9913 +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1; +t1_INT REGEXP_REPLACE(t1_INT, '\.99$', 'TeddyBear') +-7299 -7TeddyBear +103 103 +9913 9913 +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '03$', 'TeddyBear') FROM t1 ORDER BY 1; +t1_INT REGEXP_REPLACE(t1_INT, '03$', 'TeddyBear') +-7299 -7299 +103 1TeddyBear +9913 9913 +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.+', 'TeddyBear') FROM t1 ORDER BY 1; +t1_INT REGEXP_REPLACE(t1_INT, '\.+', 'TeddyBear') +-7299 TeddyBear +103 TeddyBear +9913 TeddyBear +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '0?', 'TeddyBear') FROM t1 ORDER BY 1; +t1_INT REGEXP_REPLACE(t1_INT, '0?', 'TeddyBear') +-7299 TeddyBear-TeddyBear7TeddyBear2TeddyBear9TeddyBear9TeddyBear +103 TeddyBear1TeddyBearTeddyBear3TeddyBear +9913 TeddyBear9TeddyBear9TeddyBear1TeddyBear3TeddyBear +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '-26', 'TeddyBear') FROM t1 ORDER BY 1; +t1_INT REGEXP_REPLACE(t1_INT, '-26', 'TeddyBear') +-7299 -7299 +103 103 +9913 9913 SELECT t1_DECIMAL, t1_DECIMAL REGEXP '99$' FROM t1 ORDER BY 1; t1_DECIMAL t1_DECIMAL REGEXP '99$' 111.99000 0 @@ -72,6 +162,96 @@ t1_DECIMAL t1_DECIMAL REGEXP '-26' 111.99000 0 1234.56990 0 98765.43210 0 +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '99$') +111.99000 +1234.56990 +98765.43210 +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '\.99$') +111.99000 +1234.56990 +98765.43210 +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '03$') +111.99000 +1234.56990 +98765.43210 +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '\.+') +111.99000 111.99000 +1234.56990 1234.56990 +98765.43210 98765.43210 +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '0?') +111.99000 +1234.56990 +98765.43210 +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_SUBSTR(t1_DECIMAL, '-26') +111.99000 +1234.56990 +98765.43210 +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '99$') +111.99000 0 +1234.56990 0 +98765.43210 0 +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '\.99$') +111.99000 0 +1234.56990 0 +98765.43210 0 +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '03$') +111.99000 0 +1234.56990 0 +98765.43210 0 +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '\.+') +111.99000 1 +1234.56990 1 +98765.43210 1 +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '0?') +111.99000 1 +1234.56990 1 +98765.43210 1 +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_INSTR(t1_DECIMAL, '-26') +111.99000 0 +1234.56990 0 +98765.43210 0 +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '99$', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '99$', 'TeddyBear') +111.99000 111.99000 +1234.56990 1234.56990 +98765.43210 98765.43210 +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '\.99$', 'TeddyBear') +111.99000 111.99000 +1234.56990 1234.56990 +98765.43210 98765.43210 +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '03$', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '03$', 'TeddyBear') +111.99000 111.99000 +1234.56990 1234.56990 +98765.43210 98765.43210 +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.+', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '\.+', 'TeddyBear') +111.99000 TeddyBear +1234.56990 TeddyBear +98765.43210 TeddyBear +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '0?', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '0?', 'TeddyBear') +111.99000 TeddyBear1TeddyBear1TeddyBear1TeddyBear.TeddyBear9TeddyBear9TeddyBearTeddyBearTeddyBearTeddyBear +1234.56990 TeddyBear1TeddyBear2TeddyBear3TeddyBear4TeddyBear.TeddyBear5TeddyBear6TeddyBear9TeddyBear9TeddyBearTeddyBear +98765.43210 TeddyBear9TeddyBear8TeddyBear7TeddyBear6TeddyBear5TeddyBear.TeddyBear4TeddyBear3TeddyBear2TeddyBear1TeddyBearTeddyBear +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '-26', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DECIMAL REGEXP_REPLACE(t1_DECIMAL, '-26', 'TeddyBear') +111.99000 111.99000 +1234.56990 1234.56990 +98765.43210 98765.43210 SELECT t1_TEXT, t1_TEXT REGEXP 'oooo$' FROM t1 ORDER BY 1; t1_TEXT t1_TEXT REGEXP 'oooo$' ooooo 1 @@ -112,6 +292,131 @@ t1_TEXT t1_TEXT REGEXP 'o?o' ooooo 1 ppppp 0 qqqqq 0 +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'oooo$') +ooooo oooo +ppppp +qqqqq +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'q$') +ooooo +ppppp +qqqqq q +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'ppppp$') +ooooo +ppppp ppppp +qqqqq +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_SUBSTR(t1_TEXT, '(ooo)+') +ooooo ooo +ppppp +qqqqq +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_SUBSTR(t1_TEXT, '(qqqqq)+') +ooooo +ppppp +qqqqq qqqqq +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'p*') +ooooo +ppppp ppppp +qqqqq +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'qq+q') +ooooo +ppppp +qqqqq qqqqq +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_SUBSTR(t1_TEXT, 'o?o') +ooooo oo +ppppp +qqqqq +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_INSTR(t1_TEXT, 'oooo$') +ooooo 2 +ppppp 0 +qqqqq 0 +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_INSTR(t1_TEXT, 'q$') +ooooo 0 +ppppp 0 +qqqqq 5 +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_INSTR(t1_TEXT, 'ppppp$') +ooooo 0 +ppppp 1 +qqqqq 0 +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_INSTR(t1_TEXT, '(ooo)+') +ooooo 1 +ppppp 0 +qqqqq 0 +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_INSTR(t1_TEXT, '(qqqqq)+') +ooooo 0 +ppppp 0 +qqqqq 1 +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_INSTR(t1_TEXT, 'p*') +ooooo 1 +ppppp 1 +qqqqq 1 +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_INSTR(t1_TEXT, 'qq+q') +ooooo 0 +ppppp 0 +qqqqq 1 +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_INSTR(t1_TEXT, 'o?o') +ooooo 1 +ppppp 0 +qqqqq 0 +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'oooo$', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, 'oooo$', 'KittyCat') +ooooo oKittyCat +ppppp ppppp +qqqqq qqqqq +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q$', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, 'q$', 'KittyCat') +ooooo ooooo +ppppp ppppp +qqqqq qqqqKittyCat +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'ppppp$', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, 'ppppp$', 'KittyCat') +ooooo ooooo +ppppp KittyCat +qqqqq qqqqq +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(ooo)+', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, '(ooo)+', 'KittyCat') +ooooo KittyCatoo +ppppp ppppp +qqqqq qqqqq +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(qqqqq)+', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, '(qqqqq)+', 'KittyCat') +ooooo ooooo +ppppp ppppp +qqqqq KittyCat +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'p*', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, 'p*', 'KittyCat') +ooooo KittyCatoKittyCatoKittyCatoKittyCatoKittyCatoKittyCat +ppppp KittyCatKittyCat +qqqqq KittyCatqKittyCatqKittyCatqKittyCatqKittyCatqKittyCat +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'qq+q', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, 'qq+q', 'KittyCat') +ooooo ooooo +ppppp ppppp +qqqqq KittyCat +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'o?o', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, 'o?o', 'KittyCat') +ooooo KittyCatKittyCatKittyCat +ppppp ppppp +qqqqq qqqqq +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q', 'KittyCat') FROM t1 ORDER BY 1; +t1_TEXT REGEXP_REPLACE(t1_TEXT, 'q', 'KittyCat') +ooooo ooooo +ppppp ppppp +qqqqq KittyCatKittyCatKittyCatKittyCatKittyCat SELECT t1_DATE, t1_DATE REGEXP '(1997)+' FROM t1 ORDER BY 1; t1_DATE t1_DATE REGEXP '(1997)+' 1997-12-12 1 @@ -132,6 +437,66 @@ t1_DATE t1_DATE REGEXP '(09-12-)+' 1997-12-12 0 2001-01-01 0 2009-12-11 1 +SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1; +t1_DATE REGEXP_SUBSTR(t1_DATE, '(1997)+') +1997-12-12 1997 +2001-01-01 +2009-12-11 +SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '01$') FROM t1 ORDER BY 1; +t1_DATE REGEXP_SUBSTR(t1_DATE, '01$') +1997-12-12 +2001-01-01 01 +2009-12-11 +SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1; +t1_DATE REGEXP_SUBSTR(t1_DATE, '(59)+') +1997-12-12 +2001-01-01 +2009-12-11 +SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1; +t1_DATE REGEXP_SUBSTR(t1_DATE, '(09-12-)+') +1997-12-12 +2001-01-01 +2009-12-11 09-12- +SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1; +t1_DATE REGEXP_INSTR(t1_DATE, '(1997)+') +1997-12-12 1 +2001-01-01 0 +2009-12-11 0 +SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '01$') FROM t1 ORDER BY 1; +t1_DATE REGEXP_INSTR(t1_DATE, '01$') +1997-12-12 0 +2001-01-01 9 +2009-12-11 0 +SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1; +t1_DATE REGEXP_INSTR(t1_DATE, '(59)+') +1997-12-12 0 +2001-01-01 0 +2009-12-11 0 +SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1; +t1_DATE REGEXP_INSTR(t1_DATE, '(09-12-)+') +1997-12-12 0 +2001-01-01 0 +2009-12-11 3 +SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(1997)+', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DATE REGEXP_REPLACE(t1_DATE, '(1997)+', 'TeddyBear') +1997-12-12 TeddyBear-12-12 +2001-01-01 2001-01-01 +2009-12-11 2009-12-11 +SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '01$', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DATE REGEXP_REPLACE(t1_DATE, '01$', 'TeddyBear') +1997-12-12 1997-12-12 +2001-01-01 2001-01-TeddyBear +2009-12-11 2009-12-11 +SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(59)+', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DATE REGEXP_REPLACE(t1_DATE, '(59)+', 'TeddyBear') +1997-12-12 1997-12-12 +2001-01-01 2001-01-01 +2009-12-11 2009-12-11 +SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(09-12-)+', 'TeddyBear') FROM t1 ORDER BY 1; +t1_DATE REGEXP_REPLACE(t1_DATE, '(09-12-)+', 'TeddyBear') +1997-12-12 1997-12-12 +2001-01-01 2001-01-01 +2009-12-11 20TeddyBear11 SELECT t1_TIME, t1_TIME REGEXP '(59)+' FROM t1 ORDER BY 1; t1_TIME t1_TIME REGEXP '(59)+' 01:08:59 1 @@ -142,4 +507,161 @@ t1_TIME t1_TIME REGEXP '22$' 01:08:59 0 22:12:02 0 23:59:59 0 +SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1; +t1_TIME REGEXP_SUBSTR(t1_TIME, '(59)+') +01:08:59 59 +22:12:02 +23:59:59 59 +SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '22$') FROM t1 ORDER BY 1; +t1_TIME REGEXP_SUBSTR(t1_TIME, '22$') +01:08:59 +22:12:02 +23:59:59 +SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1; +t1_TIME REGEXP_INSTR(t1_TIME, '(59)+') +01:08:59 7 +22:12:02 0 +23:59:59 4 +SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '22$') FROM t1 ORDER BY 1; +t1_TIME REGEXP_INSTR(t1_TIME, '22$') +01:08:59 0 +22:12:02 0 +23:59:59 0 +SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '(59)+', 'KittyCat') FROM t1 ORDER BY 1; +t1_TIME REGEXP_REPLACE(t1_TIME, '(59)+', 'KittyCat') +01:08:59 01:08:KittyCat +22:12:02 22:12:02 +23:59:59 23:KittyCat:KittyCat +SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '22$', 'KittyCat') FROM t1 ORDER BY 1; +t1_TIME REGEXP_REPLACE(t1_TIME, '22$', 'KittyCat') +01:08:59 01:08:59 +22:12:02 22:12:02 +23:59:59 23:59:59 +SET character_set_connection = 'utf8'; +SET NAMES utf8mb3; +CREATE TABLE t2 (hello text) DEFAULT CHARSET=utf8 engine columnstore; +INSERT INTO t2 values('こんにちは'); +INSERT INTO t2 values('привет'); +INSERT INTO t2 values('Γεια'); +INSERT INTO t2 values('სალამი'); +SELECT hello, hello regexp 'ん.ち' FROM t2; +hello hello regexp 'ん.ち' +こんにちは 0 +привет 0 +Γεια 0 +სალამი 0 +SELECT hello, hello regexp 'и.е' FROM t2; +hello hello regexp 'и.е' +こんにちは 0 +привет 0 +Γεια 0 +სალამი 0 +SELECT hello, hello regexp 'ε.α' FROM t2; +hello hello regexp 'ε.α' +こんにちは 0 +привет 0 +Γεια 0 +სალამი 0 +SELECT hello, hello regexp 'ა.ა' FROM t2; +hello hello regexp 'ა.ა' +こんにちは 0 +привет 0 +Γεια 0 +სალამი 0 +SELECT hello, regexp_substr(hello, 'ん.ち') FROM t2; +hello regexp_substr(hello, 'ん.ち') +こんにちは んにち +привет +Γεια +სალამი +SELECT hello, regexp_substr(hello, 'и.е') FROM t2; +hello regexp_substr(hello, 'и.е') +こんにちは +привет иве +Γεια +სალამი +SELECT hello, regexp_substr(hello, 'ε.α') FROM t2; +hello regexp_substr(hello, 'ε.α') +こんにちは +привет +Γεια εια +სალამი +SELECT hello, regexp_substr(hello, 'ა.ა') FROM t2; +hello regexp_substr(hello, 'ა.ა') +こんにちは +привет +Γεια +სალამი ალა +SELECT hello, regexp_instr(hello, 'ん.ち') FROM t2; +hello regexp_instr(hello, 'ん.ち') +こんにちは 2 +привет 0 +Γεια 0 +სალამი 0 +SELECT hello, regexp_instr(hello, 'и.е') FROM t2; +hello regexp_instr(hello, 'и.е') +こんにちは 0 +привет 3 +Γεια 0 +სალამი 0 +SELECT hello, regexp_instr(hello, 'ε.α') FROM t2; +hello regexp_instr(hello, 'ε.α') +こんにちは 0 +привет 0 +Γεια 2 +სალამი 0 +SELECT hello, regexp_instr(hello, 'ა.ა') FROM t2; +hello regexp_instr(hello, 'ა.ა') +こんにちは 0 +привет 0 +Γεια 0 +სალამი 2 +SELECT hello, regexp_replace(hello, 'ん.ち', 'Достоевский') FROM t2; +hello regexp_replace(hello, 'ん.ち', 'Достоевский') +こんにちは こДостоевскийは +привет привет +Γεια Γεια +სალამი სალამი +SELECT hello, regexp_replace(hello, 'и.е', 'Достоевский') FROM t2; +hello regexp_replace(hello, 'и.е', 'Достоевский') +こんにちは こんにちは +привет прДостоевскийт +Γεια Γεια +სალამი სალამი +SELECT hello, regexp_replace(hello, 'ε.α', 'Достоевский') FROM t2; +hello regexp_replace(hello, 'ε.α', 'Достоевский') +こんにちは こんにちは +привет привет +Γεια ΓДостоевский +სალამი სალამი +SELECT hello, regexp_replace(hello, 'ა.ა', 'Достоевский') FROM t2; +hello regexp_replace(hello, 'ა.ა', 'Достоевский') +こんにちは こんにちは +привет привет +Γεια Γεια +სალამი სДостоевскийმი +SHOW VARIABLES LIKE 'character_set%'; +Variable_name Value +character_set_client utf8mb3 +character_set_connection utf8mb3 +character_set_database utf8mb3 +character_set_filesystem binary +character_set_results utf8mb3 +character_set_server utf8mb3 +character_set_system utf8mb3 +character_sets_dir /usr/share/mysql/charsets/ +CREATE TABLE tw(hello text) DEFAULT CHARSET=cp1251 ENGINE COLUMNSTORE; +INSERT INTO tw values(convert('привет' USING cp1251)); +SELECT hello, regexp_instr(hello, convert('и.е' USING cp1251)) FROM tw; +hello regexp_instr(hello, convert('и.е' USING cp1251)) +привет 3 +SELECT hello, convert(regexp_substr(hello, convert('и.е' USING cp1251)) using utf8) FROM tw; +hello convert(regexp_substr(hello, convert('и.е' USING cp1251)) using utf8) +привет иве +SELECT hello, convert(regexp_replace(hello, convert('и.е' USING cp1251), convert('Достоевкий' USING cp1251)) using utf8) FROM tw; +hello convert(regexp_replace(hello, convert('и.е' USING cp1251), convert('Достоевкий' USING cp1251)) using utf8) +привет привет +SELECT hello, hello regexp convert('и.е' USING cp1251) FROM tw; +hello hello regexp convert('и.е' USING cp1251) +привет 1 DROP DATABASE mcs228_db; diff --git a/mysql-test/columnstore/basic/r/mcs244_regexp_instr_function.result b/mysql-test/columnstore/basic/r/mcs244_regexp_instr_function.result index 5f48b7d13..3658dfeed 100644 --- a/mysql-test/columnstore/basic/r/mcs244_regexp_instr_function.result +++ b/mysql-test/columnstore/basic/r/mcs244_regexp_instr_function.result @@ -13,5 +13,8 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02'); INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59'); INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59'); SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o') FROM t1 ORDER BY 1; -ERROR 42000: The storage engine for the table doesn't support MCS-1001: Function 'regexp_instr' isn't supported. +t1_TEXT REGEXP_INSTR(t1_TEXT, 'o') +ooooo 1 +ppppp 0 +qqqqq 0 DROP DATABASE mcs244_db; diff --git a/mysql-test/columnstore/basic/r/mcs245_regexp_replace_function.result b/mysql-test/columnstore/basic/r/mcs245_regexp_replace_function.result index d1b65185a..dba276857 100644 --- a/mysql-test/columnstore/basic/r/mcs245_regexp_replace_function.result +++ b/mysql-test/columnstore/basic/r/mcs245_regexp_replace_function.result @@ -13,5 +13,8 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02'); INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59'); INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59'); SELECT REGEXP_REPLACE(t1_TEXT, 'o', 'X') FROM t1 ORDER BY 1; -ERROR 42000: The storage engine for the table doesn't support MCS-1001: Function 'regexp_replace' isn't supported. +REGEXP_REPLACE(t1_TEXT, 'o', 'X') +ppppp +qqqqq +XXXXX DROP DATABASE mcs245_db; diff --git a/mysql-test/columnstore/basic/r/mcs246_regexp_substr_function.result b/mysql-test/columnstore/basic/r/mcs246_regexp_substr_function.result index 965f01577..2d44aec16 100644 --- a/mysql-test/columnstore/basic/r/mcs246_regexp_substr_function.result +++ b/mysql-test/columnstore/basic/r/mcs246_regexp_substr_function.result @@ -13,5 +13,8 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02'); INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59'); INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59'); SELECT REGEXP_SUBSTR(t1_TEXT, '[a-z]+') FROM t1 ORDER BY 1; -ERROR 42000: The storage engine for the table doesn't support MCS-1001: Function 'regexp_substr' isn't supported. +REGEXP_SUBSTR(t1_TEXT, '[a-z]+') +ooooo +ppppp +qqqqq DROP DATABASE mcs246_db; diff --git a/mysql-test/columnstore/basic/r/mcs35_select_group_by.result b/mysql-test/columnstore/basic/r/mcs35_select_group_by.result index eb8f4e42a..eb7dbc28b 100644 --- a/mysql-test/columnstore/basic/r/mcs35_select_group_by.result +++ b/mysql-test/columnstore/basic/r/mcs35_select_group_by.result @@ -56,7 +56,9 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL SELECT * FROM t1 GROUP BY t1_tinyint; -ERROR HY000: Internal error: MCS-2021: '`mcs35_db1`.`t1`.`t1_int`' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause. +t1_tinyint t1_int t1_bigint t1_double t1_float t1_blob t1_text t1_char t1_varchar t1_datetime +0 NULL 403685477580676 54.797693231 8.40287 222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd d hello world! 4387-11-08 11:22:30 +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL SELECT COUNT(*) FROM t1 GROUP BY t1_tinyint; COUNT(*) 11 diff --git a/mysql-test/columnstore/basic/r/mcs36_select_order_by_group_by.result b/mysql-test/columnstore/basic/r/mcs36_select_order_by_group_by.result index 94fe58383..a0f55dff5 100644 --- a/mysql-test/columnstore/basic/r/mcs36_select_order_by_group_by.result +++ b/mysql-test/columnstore/basic/r/mcs36_select_order_by_group_by.result @@ -69,7 +69,11 @@ spID userid MIN(t1.score) 3 3 3 SELECT t1.spID, t2.userid, MIN(t1.score) FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid, t1.spID ORDER BY t1.date; -ERROR HY000: Internal error: MCS-2021: 'mcs36_db1.t1.date' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause. +spID userid MIN(t1.score) +1 1 1 +2 1 1 +2 2 2 +3 3 3 SELECT t2.userid, MIN(t1.score) FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY unknown ORDER BY t2.userid; ERROR 42S22: Unknown column 'unknown' in 'group statement' @@ -78,5 +82,20 @@ FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY unknown; ERROR 42S22: Unknown column 'unknown' in 'order clause' SELECT t2.userid, MIN(t1.score) FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY NULL; -ERROR HY000: Internal error: MCS-2021: 'unknown db.unknown table.unknown field' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause. +userid MIN(t1.score) +1 1 +2 2 +3 3 +SELECT * FROM t1 GROUP BY spID, userID ORDER BY score ASC, spID, userID; +spID userID score lsg date +1 1 1 0000-00-00 +2 1 1 0000-00-00 +2 2 2 0000-00-00 +3 3 3 0000-00-00 +SELECT * FROM t1 GROUP BY spID, userID ORDER BY score DESC, spID, userID; +spID userID score lsg date +3 3 3 0000-00-00 +2 2 2 0000-00-00 +1 1 1 0000-00-00 +2 1 1 0000-00-00 DROP DATABASE IF EXISTS mcs36_db1; diff --git a/mysql-test/columnstore/basic/r/mcs76_having.result b/mysql-test/columnstore/basic/r/mcs76_having.result index 27f9188f1..3b68e51be 100644 --- a/mysql-test/columnstore/basic/r/mcs76_having.result +++ b/mysql-test/columnstore/basic/r/mcs76_having.result @@ -23,7 +23,9 @@ col1 col2 3 sss 4 ooo SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1; -ERROR HY000: Internal error: MCS-2021: '`mcs76_db`.`t1`.`col2`' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause. +col1 col2 SUM(LENGTH(col2)) +2 oooooooooooooooooooo 40 +4 ooo 6 CREATE TABLE t2(col1 INT, col2 DATETIME)ENGINE=Columnstore; INSERT INTO t2 VALUES(1, '2020-2-2'),(2, '2020-3-3'),(5,'2020-6-6'),(6, '2020-7-7'); SELECT t1.col1, SUM(t1.col1*t2.col1) AS a FROM t1 JOIN t2 ON t1.col1 = t2.col1 GROUP BY t1.col1 HAVING a>1 ORDER BY t1.col1; diff --git a/mysql-test/columnstore/basic/t/MCOL-5035-update-insert-same-behavior.test b/mysql-test/columnstore/basic/t/MCOL-5035-update-insert-same-behavior.test new file mode 100644 index 000000000..7083c5091 --- /dev/null +++ b/mysql-test/columnstore/basic/t/MCOL-5035-update-insert-same-behavior.test @@ -0,0 +1,19 @@ +--disable_warnings +DROP DATABASE IF EXISTS MCOL5535; +--enable_warnings + +CREATE DATABASE MCOL5535; + +USE MCOL5535; + +CREATE TABLE t1(i INTEGER, x DATETIME) ENGINE=COLUMNSTORE; + +INSERT INTO t1 (i, x) VALUES (1, '0000-00-00 00:00:00'), (2, NULL), (3, '2024-01-01 01:01:01'); + +SELECT * FROM t1; + +UPDATE t1 SET x='0000-00-00 00:00:00' WHERE i = 3; + +SELECT * FROM t1; + +DROP DATABASE MCOL5535; diff --git a/mysql-test/columnstore/basic/t/MCOL-5772-hidden-order-by.test b/mysql-test/columnstore/basic/t/MCOL-5772-hidden-order-by.test new file mode 100644 index 000000000..ff83153b4 --- /dev/null +++ b/mysql-test/columnstore/basic/t/MCOL-5772-hidden-order-by.test @@ -0,0 +1,28 @@ +# Order by a column that is not in GROUP BY and SELECT parts +# should be correct. +--disable_warnings +DROP DATABASE IF EXISTS MCOL5772; +--enable_warnings + +CREATE DATABASE MCOL5772; + +USE MCOL5772; + +CREATE TABLE products ( +product_id INT, +product_name VARCHAR(100), +category VARCHAR(50), +unit_price DECIMAL(10, 2), +stock_quantity INT +) ENGINE=Columnstore; + +INSERT INTO products VALUES +(1, 'Laptop', 'Electronics', 1200.00, 50), +(2, 'Smartphone', 'Electronics', 800.00, 100), +(3, 'Coffee Maker', 'Appliances', 50.00, 30), +(4, 'Backpack', 'Fashion', 40.00, 80), +(5, 'Desk Chair', 'Furniture', 150.00, 20); + +SELECT product_name, SUM(stock_quantity) AS total_stock FROM products GROUP BY category ORDER BY stock_quantity; + +DROP DATABASE MCOL5772; diff --git a/mysql-test/columnstore/basic/t/mcol-4525.test b/mysql-test/columnstore/basic/t/mcol-4525.test index ba424bca7..cbdaf495f 100644 --- a/mysql-test/columnstore/basic/t/mcol-4525.test +++ b/mysql-test/columnstore/basic/t/mcol-4525.test @@ -85,7 +85,6 @@ INSERT INTO t1 VALUES(10,'bob',10); SET columnstore_select_handler=ON; --error 1178 SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2; ---error 1815 SELECT col1 c FROM t1 ORDER BY AVG(col1); SET columnstore_select_handler=AUTO; SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2; diff --git a/mysql-test/columnstore/basic/t/mcs228_regexp_operator.test b/mysql-test/columnstore/basic/t/mcs228_regexp_operator.test index 6406f17e6..faad4228c 100644 --- a/mysql-test/columnstore/basic/t/mcs228_regexp_operator.test +++ b/mysql-test/columnstore/basic/t/mcs228_regexp_operator.test @@ -30,6 +30,27 @@ SELECT t1_INT, t1_INT REGEXP '\.+' FROM t1 ORDER BY 1; SELECT t1_INT, t1_INT REGEXP '0?' FROM t1 ORDER BY 1; SELECT t1_INT, t1_INT REGEXP '-26' FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '99$') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '03$') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '\.+') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '0?') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_SUBSTR(t1_INT, '-26') FROM t1 ORDER BY 1; + +SELECT t1_INT, REGEXP_INSTR(t1_INT, '99$') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.99$') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_INSTR(t1_INT, '03$') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_INSTR(t1_INT, '\.+') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_INSTR(t1_INT, '0?') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_INSTR(t1_INT, '-26') FROM t1 ORDER BY 1; + +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '99$', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '03$', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '\.+', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '0?', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_INT, REGEXP_REPLACE(t1_INT, '-26', 'TeddyBear') FROM t1 ORDER BY 1; + SELECT t1_DECIMAL, t1_DECIMAL REGEXP '99$' FROM t1 ORDER BY 1; SELECT t1_DECIMAL, t1_DECIMAL REGEXP '\.99$' FROM t1 ORDER BY 1; SELECT t1_DECIMAL, t1_DECIMAL REGEXP '03$' FROM t1 ORDER BY 1; @@ -37,6 +58,28 @@ SELECT t1_DECIMAL, t1_DECIMAL REGEXP '\.+' FROM t1 ORDER BY 1; SELECT t1_DECIMAL, t1_DECIMAL REGEXP '0?' FROM t1 ORDER BY 1; SELECT t1_DECIMAL, t1_DECIMAL REGEXP '-26' FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_SUBSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1; + +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '99$') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.99$') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '03$') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '\.+') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '0?') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_INSTR(t1_DECIMAL, '-26') FROM t1 ORDER BY 1; + +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '99$', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.99$', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '03$', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '\.+', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '0?', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_DECIMAL, REGEXP_REPLACE(t1_DECIMAL, '-26', 'TeddyBear') FROM t1 ORDER BY 1; + + SELECT t1_TEXT, t1_TEXT REGEXP 'oooo$' FROM t1 ORDER BY 1; SELECT t1_TEXT, t1_TEXT REGEXP 'q$' FROM t1 ORDER BY 1; SELECT t1_TEXT, t1_TEXT REGEXP 'ppppp$' FROM t1 ORDER BY 1; @@ -46,13 +89,104 @@ SELECT t1_TEXT, t1_TEXT REGEXP 'p*' FROM t1 ORDER BY 1; SELECT t1_TEXT, t1_TEXT REGEXP 'qq+q' FROM t1 ORDER BY 1; SELECT t1_TEXT, t1_TEXT REGEXP 'o?o' FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_SUBSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1; + +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'oooo$') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'q$') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'ppppp$') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(ooo)+') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, '(qqqqq)+') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'p*') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'qq+q') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o?o') FROM t1 ORDER BY 1; + +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'oooo$', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q$', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'ppppp$', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(ooo)+', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, '(qqqqq)+', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'p*', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'qq+q', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'o?o', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TEXT, REGEXP_REPLACE(t1_TEXT, 'q', 'KittyCat') FROM t1 ORDER BY 1; + SELECT t1_DATE, t1_DATE REGEXP '(1997)+' FROM t1 ORDER BY 1; SELECT t1_DATE, t1_DATE REGEXP '01$' FROM t1 ORDER BY 1; SELECT t1_DATE, t1_DATE REGEXP '(59)+' FROM t1 ORDER BY 1; SELECT t1_DATE, t1_DATE REGEXP '(09-12-)+' FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '01$') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_SUBSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1; + +SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(1997)+') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '01$') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(59)+') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_INSTR(t1_DATE, '(09-12-)+') FROM t1 ORDER BY 1; + +SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(1997)+', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '01$', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(59)+', 'TeddyBear') FROM t1 ORDER BY 1; +SELECT t1_DATE, REGEXP_REPLACE(t1_DATE, '(09-12-)+', 'TeddyBear') FROM t1 ORDER BY 1; + SELECT t1_TIME, t1_TIME REGEXP '(59)+' FROM t1 ORDER BY 1; SELECT t1_TIME, t1_TIME REGEXP '22$' FROM t1 ORDER BY 1; +SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1; +SELECT t1_TIME, REGEXP_SUBSTR(t1_TIME, '22$') FROM t1 ORDER BY 1; + +SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '(59)+') FROM t1 ORDER BY 1; +SELECT t1_TIME, REGEXP_INSTR(t1_TIME, '22$') FROM t1 ORDER BY 1; + + +SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '(59)+', 'KittyCat') FROM t1 ORDER BY 1; +SELECT t1_TIME, REGEXP_REPLACE(t1_TIME, '22$', 'KittyCat') FROM t1 ORDER BY 1; + +SET character_set_connection = 'utf8'; +SET NAMES utf8mb3; + +CREATE TABLE t2 (hello text) DEFAULT CHARSET=utf8 engine columnstore; +INSERT INTO t2 values('こんにちは'); +INSERT INTO t2 values('привет'); +INSERT INTO t2 values('Γεια'); +INSERT INTO t2 values('სალამი'); + +SELECT hello, hello regexp 'ん.ち' FROM t2; +SELECT hello, hello regexp 'и.е' FROM t2; +SELECT hello, hello regexp 'ε.α' FROM t2; +SELECT hello, hello regexp 'ა.ა' FROM t2; + +SELECT hello, regexp_substr(hello, 'ん.ち') FROM t2; +SELECT hello, regexp_substr(hello, 'и.е') FROM t2; +SELECT hello, regexp_substr(hello, 'ε.α') FROM t2; +SELECT hello, regexp_substr(hello, 'ა.ა') FROM t2; + +SELECT hello, regexp_instr(hello, 'ん.ち') FROM t2; +SELECT hello, regexp_instr(hello, 'и.е') FROM t2; +SELECT hello, regexp_instr(hello, 'ε.α') FROM t2; +SELECT hello, regexp_instr(hello, 'ა.ა') FROM t2; + +SELECT hello, regexp_replace(hello, 'ん.ち', 'Достоевский') FROM t2; +SELECT hello, regexp_replace(hello, 'и.е', 'Достоевский') FROM t2; +SELECT hello, regexp_replace(hello, 'ε.α', 'Достоевский') FROM t2; +SELECT hello, regexp_replace(hello, 'ა.ა', 'Достоевский') FROM t2; + +SHOW VARIABLES LIKE 'character_set%'; +CREATE TABLE tw(hello text) DEFAULT CHARSET=cp1251 ENGINE COLUMNSTORE; +INSERT INTO tw values(convert('привет' USING cp1251)); +SELECT hello, regexp_instr(hello, convert('и.е' USING cp1251)) FROM tw; +SELECT hello, convert(regexp_substr(hello, convert('и.е' USING cp1251)) using utf8) FROM tw; +SELECT hello, convert(regexp_replace(hello, convert('и.е' USING cp1251), convert('Достоевкий' USING cp1251)) using utf8) FROM tw; + +SELECT hello, hello regexp convert('и.е' USING cp1251) FROM tw; + # Clean UP DROP DATABASE mcs228_db; diff --git a/mysql-test/columnstore/basic/t/mcs244_regexp_instr_function.test b/mysql-test/columnstore/basic/t/mcs244_regexp_instr_function.test index fca3ddecb..2c0e35d07 100644 --- a/mysql-test/columnstore/basic/t/mcs244_regexp_instr_function.test +++ b/mysql-test/columnstore/basic/t/mcs244_regexp_instr_function.test @@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02'); INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59'); INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59'); ---error ER_CHECK_NOT_IMPLEMENTED SELECT t1_TEXT, REGEXP_INSTR(t1_TEXT, 'o') FROM t1 ORDER BY 1; # Clean UP diff --git a/mysql-test/columnstore/basic/t/mcs245_regexp_replace_function.test b/mysql-test/columnstore/basic/t/mcs245_regexp_replace_function.test index ed27b9365..d6f22fdef 100644 --- a/mysql-test/columnstore/basic/t/mcs245_regexp_replace_function.test +++ b/mysql-test/columnstore/basic/t/mcs245_regexp_replace_function.test @@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02'); INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59'); INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59'); ---error ER_CHECK_NOT_IMPLEMENTED SELECT REGEXP_REPLACE(t1_TEXT, 'o', 'X') FROM t1 ORDER BY 1; # Clean UP diff --git a/mysql-test/columnstore/basic/t/mcs246_regexp_substr_function.test b/mysql-test/columnstore/basic/t/mcs246_regexp_substr_function.test index 29e8eba5b..cadab930d 100644 --- a/mysql-test/columnstore/basic/t/mcs246_regexp_substr_function.test +++ b/mysql-test/columnstore/basic/t/mcs246_regexp_substr_function.test @@ -23,7 +23,6 @@ INSERT INTO t1 VALUES(103, 1234.5699, repeat('o', 5), '1997-12-12', '22:12:02'); INSERT INTO t1 VALUES(-7299, 111.99, repeat('p', 5), '2001-1-1', '23:59:59'); INSERT INTO t1 VALUES(9913, 98765.4321, repeat('q', 5), '09-12-11', '01:08:59'); ---error ER_CHECK_NOT_IMPLEMENTED SELECT REGEXP_SUBSTR(t1_TEXT, '[a-z]+') FROM t1 ORDER BY 1; # Clean UP diff --git a/mysql-test/columnstore/basic/t/mcs35_select_group_by.test b/mysql-test/columnstore/basic/t/mcs35_select_group_by.test index 07db340a4..80a26aea2 100644 --- a/mysql-test/columnstore/basic/t/mcs35_select_group_by.test +++ b/mysql-test/columnstore/basic/t/mcs35_select_group_by.test @@ -52,7 +52,7 @@ INSERT INTO t1 VALUES(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NUL --sorted_result SELECT * FROM t1; #Without aggregate function ---error 1815 +--sorted_result SELECT * FROM t1 GROUP BY t1_tinyint; --sorted_result diff --git a/mysql-test/columnstore/basic/t/mcs36_select_order_by_group_by.test b/mysql-test/columnstore/basic/t/mcs36_select_order_by_group_by.test index b2b95dcf2..5411ae23e 100644 --- a/mysql-test/columnstore/basic/t/mcs36_select_order_by_group_by.test +++ b/mysql-test/columnstore/basic/t/mcs36_select_order_by_group_by.test @@ -60,8 +60,7 @@ SELECT t1.spID, t2.userid, MIN(t1.score) FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid, t1.spID ORDER BY t1.spID, t2.userid; # columns in ORDER BY clause not included in the GROUP BY clause. -#Suspected bug. Innodb succeeds. ---error 1815 +--sorted_result SELECT t1.spID, t2.userid, MIN(t1.score) FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid, t1.spID ORDER BY t1.date; @@ -76,10 +75,13 @@ SELECT t2.userid, MIN(t1.score) FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY unknown; # order by null -#Suspected bug. Innodb succeeds. ---error 1815 +# Innodb succeeds - it is a workaround to speed up GROUP BY. +--sorted_result SELECT t2.userid, MIN(t1.score) FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY NULL; +SELECT * FROM t1 GROUP BY spID, userID ORDER BY score ASC, spID, userID; +SELECT * FROM t1 GROUP BY spID, userID ORDER BY score DESC, spID, userID; + #Clean up DROP DATABASE IF EXISTS mcs36_db1; diff --git a/mysql-test/columnstore/basic/t/mcs76_having.test b/mysql-test/columnstore/basic/t/mcs76_having.test index c185db8f3..5286cee4e 100644 --- a/mysql-test/columnstore/basic/t/mcs76_having.test +++ b/mysql-test/columnstore/basic/t/mcs76_having.test @@ -18,7 +18,7 @@ SELECT col2 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' ORDER BY col2; SELECT col1 FROM t1 GROUP BY col1 HAVING col1 > 1 ORDER BY col1; SELECT col1, col2 FROM t1 GROUP BY col1, col2 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1; SELECT col1, col2 FROM t1 GROUP BY col1, col2 HAVING col1 > 1 OR col2 LIKE '%o%' ORDER BY col1; ---error ER_INTERNAL_ERROR +--sorted_result SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1; CREATE TABLE t2(col1 INT, col2 DATETIME)ENGINE=Columnstore; diff --git a/mysql-test/columnstore/bugfixes/MCOL-5699.result b/mysql-test/columnstore/bugfixes/MCOL-5699.result new file mode 100644 index 000000000..a90b3560a --- /dev/null +++ b/mysql-test/columnstore/bugfixes/MCOL-5699.result @@ -0,0 +1,43 @@ +DROP DATABASE IF EXISTS mcol_5699 ; +CREATE DATABASE mcol_5699 ; +USE mcol_5699 ; +CREATE TABLE cs1(a text) ENGINE=columnstore; +INSERT INTO cs1 VALUES('a'),('b'),('c'),('x'); +CREATE TABLE cs2(a text) ENGINE=columnstore; +INSERT INTO cs2 VALUES('y'),('z'),('x'); +CREATE TABLE cs3(a text) ENGINE=columnstore; +INSERT INTO cs3 VALUES('r'),('t'),('z'); +SELECT a from cs1 UNION ALL select a from cs2; +a +a +b +c +x +x +y +z +SELECT a from cs1 UNION DISTINCT select a from cs2; +a +a +b +c +x +y +z +SELECT a from cs1 INTERSECT ALL select a from cs2; +ERROR HY000: Internal error: INTERSECT is not supported by Columnstore engine +SELECT a from cs1 INTERSECT DISTINCT select a from cs2; +ERROR HY000: Internal error: INTERSECT is not supported by Columnstore engine +SELECT a from cs1 EXCEPT ALL select a from cs2; +ERROR HY000: Internal error: EXCEPT is not supported by Columnstore engine +SELECT a from cs1 EXCEPT DISTINCT select a from cs2; +ERROR HY000: Internal error: EXCEPT is not supported by Columnstore engine +SELECT a from cs1 UNION select a from cs2 INTERSECT ALL select a from cs3; +ERROR HY000: Internal error: INTERSECT is not supported by Columnstore engine +SELECT a from cs1 UNION select a from cs2 INTERSECT DISTINCT select a from cs3; +ERROR HY000: Internal error: INTERSECT is not supported by Columnstore engine +SELECT a from cs1 UNION select a from cs2 EXCEPT ALL select a from cs3; +ERROR HY000: Internal error: EXCEPT is not supported by Columnstore engine +SELECT a from cs1 UNION select a from cs2 EXCEPT DISTINCT select a from cs3; +ERROR HY000: Internal error: EXCEPT is not supported by Columnstore engine +DROP DATABASE mcol_5699 ; diff --git a/mysql-test/columnstore/bugfixes/MCOL-5699.test b/mysql-test/columnstore/bugfixes/MCOL-5699.test new file mode 100644 index 000000000..c5614a2c2 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/MCOL-5699.test @@ -0,0 +1,40 @@ +--source ../include/have_columnstore.inc +--disable_warnings +DROP DATABASE IF EXISTS mcol_5699 ; +--enable_warnings +CREATE DATABASE mcol_5699 ; +USE mcol_5699 ; +CREATE TABLE cs1(a text) ENGINE=columnstore; +INSERT INTO cs1 VALUES('a'),('b'),('c'),('x'); + +CREATE TABLE cs2(a text) ENGINE=columnstore; +INSERT INTO cs2 VALUES('y'),('z'),('x'); + +CREATE TABLE cs3(a text) ENGINE=columnstore; +INSERT INTO cs3 VALUES('r'),('t'),('z'); + +--sorted_result +SELECT a from cs1 UNION ALL select a from cs2; +--sorted_result +SELECT a from cs1 UNION DISTINCT select a from cs2; + +--ERROR 1815 +SELECT a from cs1 INTERSECT ALL select a from cs2; +--ERROR 1815 +SELECT a from cs1 INTERSECT DISTINCT select a from cs2; +--ERROR 1815 +SELECT a from cs1 EXCEPT ALL select a from cs2; +--ERROR 1815 +SELECT a from cs1 EXCEPT DISTINCT select a from cs2; + +--ERROR 1815 +SELECT a from cs1 UNION select a from cs2 INTERSECT ALL select a from cs3; +--ERROR 1815 +SELECT a from cs1 UNION select a from cs2 INTERSECT DISTINCT select a from cs3; +--ERROR 1815 +SELECT a from cs1 UNION select a from cs2 EXCEPT ALL select a from cs3; +--ERROR 1815 +SELECT a from cs1 UNION select a from cs2 EXCEPT DISTINCT select a from cs3; + + +DROP DATABASE mcol_5699 ; diff --git a/mysql-test/columnstore/bugfixes/MCOL_5175.result b/mysql-test/columnstore/bugfixes/MCOL_5175.result new file mode 100644 index 000000000..938d6a93e --- /dev/null +++ b/mysql-test/columnstore/bugfixes/MCOL_5175.result @@ -0,0 +1,21 @@ +DROP DATABASE IF EXISTS MCOL_5175; +CREATE DATABASE MCOL_5175; +USE MCOL_5175; +create table testtext2 ( myvalue varchar(100) )engine=Columnstore CHARSET=utf8; +show create table testtext2; +Table Create Table +testtext2 CREATE TABLE `testtext2` ( + `myvalue` varchar(100) DEFAULT NULL +) ENGINE=Columnstore DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +alter table testtext2 add column myvalue2 text; +show create table testtext2; +Table Create Table +testtext2 CREATE TABLE `testtext2` ( + `myvalue` varchar(100) DEFAULT NULL, + `myvalue2` text DEFAULT NULL +) ENGINE=Columnstore DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +insert into testtext2 (myvalue2) VALUES ('myvalue'); +select * from testtext2; +myvalue myvalue2 +NULL myvalue +DROP DATABASE MCOL_5175; diff --git a/mysql-test/columnstore/bugfixes/MCOL_5175.test b/mysql-test/columnstore/bugfixes/MCOL_5175.test new file mode 100644 index 000000000..9a895fc72 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/MCOL_5175.test @@ -0,0 +1,22 @@ +# +# Alter table add column +# Author: Bharath, bharath.bokka@mariadb.com +# +-- source include/have_innodb.inc +-- source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS MCOL_5175; +--enable_warnings + +CREATE DATABASE MCOL_5175; +USE MCOL_5175; + +create table testtext2 ( myvalue varchar(100) )engine=Columnstore CHARSET=utf8; +show create table testtext2; +alter table testtext2 add column myvalue2 text; +show create table testtext2; +insert into testtext2 (myvalue2) VALUES ('myvalue'); +select * from testtext2; + +DROP DATABASE MCOL_5175; \ No newline at end of file diff --git a/mysql-test/columnstore/bugfixes/mcol-4758.result b/mysql-test/columnstore/bugfixes/mcol-4758.result index b3172e3af..d9eb3637b 100644 --- a/mysql-test/columnstore/bugfixes/mcol-4758.result +++ b/mysql-test/columnstore/bugfixes/mcol-4758.result @@ -11,4 +11,9 @@ INSERT INTO src VALUES (1, "Pretty Bloby Thing", "This is some text"); select * from src where c0=1 and substr(cLT, 1, 4)="This"; c0 cLB cLT 1 Pretty Bloby Thing This is some text +ALTER TABLE src ADD COLUMN (cLT2 LONGTEXT); +UPDATE src SET cLT2="My Friday Night" where c0=1; +select * from src where c0=1 and substr(cLT, 1, 4)="This"; +c0 cLB cLT cLT2 +1 Pretty Bloby Thing This is some text My Friday Night DROP DATABASE `mcol_4758`; diff --git a/mysql-test/columnstore/bugfixes/mcol-4758.test b/mysql-test/columnstore/bugfixes/mcol-4758.test index a9117a96f..b5a1991ca 100644 --- a/mysql-test/columnstore/bugfixes/mcol-4758.test +++ b/mysql-test/columnstore/bugfixes/mcol-4758.test @@ -11,9 +11,9 @@ INSERT INTO src VALUES (1, "Pretty Bloby Thing", "This is some text"); select * from src where c0=1 and substr(cLT, 1, 4)="This"; # To be uncommented when MCOL-4480 is fixed -#ALTER TABLE src ADD COLUMN (cLT2 LONGTEXT); -#UPDATE src SET cLT2="My Friday Night" where c0=1; -#select * from src where c0=1 and substr(cLT, 1, 4)="This"; +ALTER TABLE src ADD COLUMN (cLT2 LONGTEXT); +UPDATE src SET cLT2="My Friday Night" where c0=1; +select * from src where c0=1 and substr(cLT, 1, 4)="This"; # cleanup DROP DATABASE `mcol_4758`; diff --git a/mysql-test/columnstore/bugfixes/mcol-5236.result b/mysql-test/columnstore/bugfixes/mcol-5236.result new file mode 100644 index 000000000..bb97aa01c --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5236.result @@ -0,0 +1,21 @@ +DROP DATABASE IF EXISTS mcol_5236; +CREATE DATABASE mcol_5236; +USE mcol_5236; +create table test_having_columnstore (someString varchar(1000), someInt int, FK int) ENGINE=Columnstore DEFAULT CHARSET=utf8; +insert into test_having_columnstore values ('bla', 1, 17), ('xyz', 2, 17); +create table dim_having_columnstore (PK int, someString varchar(1000)) ENGINE=Columnstore DEFAULT CHARSET=utf8; +insert into dim_having_columnstore values(17, 'test'); +create view test_having_columnstore_view as +select someString as someString, someInt as someInt, FK as FK from test_having_columnstore; +create view dim_having_columnstore_view as +select * from dim_having_columnstore; +select `dim`.`someString` as `c0` +from `dim_having_columnstore_view` as `dim`, `test_having_columnstore_view` as `fact` +where `fact`.`FK` = `dim`.`PK` +group by `dim`.`someString` +having NOT((sum(`fact`.`someInt`) is null)) +order by ISNULL(`dim`.`someString`) ASC, +`dim`.`someString` ASC; +c0 +test +DROP DATABASE mcol_5236; diff --git a/mysql-test/columnstore/bugfixes/mcol-5236.test b/mysql-test/columnstore/bugfixes/mcol-5236.test new file mode 100644 index 000000000..e1f79d4ee --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5236.test @@ -0,0 +1,34 @@ +# +# MCOL-5236 +# + +--source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol_5236; +--enable_warnings +CREATE DATABASE mcol_5236; +USE mcol_5236; + +create table test_having_columnstore (someString varchar(1000), someInt int, FK int) ENGINE=Columnstore DEFAULT CHARSET=utf8; +insert into test_having_columnstore values ('bla', 1, 17), ('xyz', 2, 17); +create table dim_having_columnstore (PK int, someString varchar(1000)) ENGINE=Columnstore DEFAULT CHARSET=utf8; +insert into dim_having_columnstore values(17, 'test'); + +create view test_having_columnstore_view as +select someString as someString, someInt as someInt, FK as FK from test_having_columnstore; + +create view dim_having_columnstore_view as +select * from dim_having_columnstore; + +select `dim`.`someString` as `c0` +from `dim_having_columnstore_view` as `dim`, `test_having_columnstore_view` as `fact` +where `fact`.`FK` = `dim`.`PK` +group by `dim`.`someString` +having NOT((sum(`fact`.`someInt`) is null)) +order by ISNULL(`dim`.`someString`) ASC, +`dim`.`someString` ASC; + +--disable_warnings +DROP DATABASE mcol_5236; +--enable_warnings diff --git a/mysql-test/columnstore/bugfixes/mcol-5237.result b/mysql-test/columnstore/bugfixes/mcol-5237.result new file mode 100644 index 000000000..89741efac --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5237.result @@ -0,0 +1,16 @@ +DROP DATABASE IF EXISTS mcol_5237; +CREATE DATABASE mcol_5237; +USE mcol_5237; +create table t1 (a varchar(1000), b datetime, c int) ENGINE=Columnstore DEFAULT CHARSET=utf8; +insert into t1 values +('abc', null, 1), +('xyz', str_to_date('2022-09-22 00:00:00', '%Y-%m-%d %H:%i:%s'), 1); +create view v1 as +select a, NVL(b, str_to_date('1970-01-01 00:00:00', '%Y-%m-%d %H:%i:%s')) as b, c from t1; +select count(*) from v1 where YEAR(b) = 2022; +count(*) +1 +select count(*) from v1 where YEAR(b) = 1970; +count(*) +1 +DROP DATABASE mcol_5237; diff --git a/mysql-test/columnstore/bugfixes/mcol-5237.test b/mysql-test/columnstore/bugfixes/mcol-5237.test new file mode 100644 index 000000000..05e3458d4 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5237.test @@ -0,0 +1,26 @@ +# +# MCOL-5237 +# + +--source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol_5237; +--enable_warnings +CREATE DATABASE mcol_5237; +USE mcol_5237; +create table t1 (a varchar(1000), b datetime, c int) ENGINE=Columnstore DEFAULT CHARSET=utf8; + +insert into t1 values +('abc', null, 1), +('xyz', str_to_date('2022-09-22 00:00:00', '%Y-%m-%d %H:%i:%s'), 1); + +create view v1 as +select a, NVL(b, str_to_date('1970-01-01 00:00:00', '%Y-%m-%d %H:%i:%s')) as b, c from t1; + +select count(*) from v1 where YEAR(b) = 2022; +select count(*) from v1 where YEAR(b) = 1970; + +--disable_warnings +DROP DATABASE mcol_5237; +--enable_warnings diff --git a/mysql-test/columnstore/bugfixes/mcol-5328.result b/mysql-test/columnstore/bugfixes/mcol-5328.result new file mode 100644 index 000000000..e4da9bc87 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5328.result @@ -0,0 +1,27 @@ +DROP DATABASE IF EXISTS mcol5328; +CREATE DATABASE mcol5328; +USE mcol5328; +CREATE TABLE cst1(a INT NOT NULL, b VARCHAR(100)) ENGINE=columnstore; +INSERT INTO cst1 VALUES(1, 'My mouse'),(2,'Breakfast Food'); +SELECT a, b FROM cst1 WHERE b REGEXP '^My'; +a b +1 My mouse +SELECT a, b FROM cst1 WHERE b REGEXP '[a|e|i|o|u]$'; +a b +1 My mouse +SELECT a, b FROM cst1 WHERE b REGEXP '^Br[aeiou]{2}.*[aeiou]{2}d$'; +a b +2 Breakfast Food +SELECT a, REGEXP_SUBSTR(b, 'a|e|i|o|u') AS "vowel" FROM cst1; +a vowel +1 o +2 e +select a, REGEXP_INSTR(b, 'a|e|i|o|u') AS "vowel" FROM cst1; +a vowel +1 5 +2 3 +select a, REGEXP_REPLACE(b, 'a|e|i|o|u', 'x') AS "vowel" FROM cst1; +a vowel +1 My mxxsx +2 Brxxkfxst Fxxd +DROP DATABASE mcol5328; diff --git a/mysql-test/columnstore/bugfixes/mcol-5328.test b/mysql-test/columnstore/bugfixes/mcol-5328.test new file mode 100644 index 000000000..b602e8e71 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5328.test @@ -0,0 +1,20 @@ +--source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol5328; +--enable_warnings + +CREATE DATABASE mcol5328; +USE mcol5328; +CREATE TABLE cst1(a INT NOT NULL, b VARCHAR(100)) ENGINE=columnstore; +INSERT INTO cst1 VALUES(1, 'My mouse'),(2,'Breakfast Food'); +SELECT a, b FROM cst1 WHERE b REGEXP '^My'; +SELECT a, b FROM cst1 WHERE b REGEXP '[a|e|i|o|u]$'; +SELECT a, b FROM cst1 WHERE b REGEXP '^Br[aeiou]{2}.*[aeiou]{2}d$'; +SELECT a, REGEXP_SUBSTR(b, 'a|e|i|o|u') AS "vowel" FROM cst1; +select a, REGEXP_INSTR(b, 'a|e|i|o|u') AS "vowel" FROM cst1; +select a, REGEXP_REPLACE(b, 'a|e|i|o|u', 'x') AS "vowel" FROM cst1; + +--disable_warnings +DROP DATABASE mcol5328; +--enable_warnings diff --git a/mysql-test/columnstore/bugfixes/mcol-5708.result b/mysql-test/columnstore/bugfixes/mcol-5708.result new file mode 100644 index 000000000..65b215812 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5708.result @@ -0,0 +1,17 @@ +DROP DATABASE IF EXISTS mcol_5708; +CREATE DATABASE mcol_5708; +USE mcol_5708; +CREATE TABLE test ( +`f_int` int(10) unsigned NOT NULL DEFAULT 0, +`f_dec14x2` decimal(14,2) NOT NULL DEFAULT 0.00, +`f_dec14x4` decimal(14,4) NOT NULL DEFAULT 0.0000 +) ENGINE=columnstore DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci; +insert into test (f_int,f_dec14x4, f_dec14x2) values (1, 0.1, 0.1); +select +f_int, sum(f_dec14x2), sum(f_dec14x4), +sum(-200.000001), sum(0), sum(0.0), sum(11.000011), sum(12e-4), sum(1e+4), +sum(-0.0), sum(-1 - 1.1), sum(--12), sum(+20), sum(13) +from test group by 1; +f_int sum(f_dec14x2) sum(f_dec14x4) sum(-200.000001) sum(0) sum(0.0) sum(11.000011) sum(12e-4) sum(1e+4) sum(-0.0) sum(-1 - 1.1) sum(--12) sum(+20) sum(13) +1 0.10 0.1000 -200.000001 0 0.0 11.000011 0.0012 10000 0.0 -2.1 12 20 13 +DROP DATABASE mcol_5708; diff --git a/mysql-test/columnstore/bugfixes/mcol-5708.test b/mysql-test/columnstore/bugfixes/mcol-5708.test new file mode 100644 index 000000000..7b04d8274 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5708.test @@ -0,0 +1,31 @@ +# +# MCOL-5708 +# + +--source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol_5708; +--enable_warnings +CREATE DATABASE mcol_5708; +USE mcol_5708; + +CREATE TABLE test ( + `f_int` int(10) unsigned NOT NULL DEFAULT 0, + `f_dec14x2` decimal(14,2) NOT NULL DEFAULT 0.00, + `f_dec14x4` decimal(14,4) NOT NULL DEFAULT 0.0000 +) ENGINE=columnstore DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci; + + +insert into test (f_int,f_dec14x4, f_dec14x2) values (1, 0.1, 0.1); + +select +f_int, sum(f_dec14x2), sum(f_dec14x4), +sum(-200.000001), sum(0), sum(0.0), sum(11.000011), sum(12e-4), sum(1e+4), +sum(-0.0), sum(-1 - 1.1), sum(--12), sum(+20), sum(13) +from test group by 1; + + +--disable_warnings +DROP DATABASE mcol_5708; +--enable_warnings diff --git a/oam/install_scripts/CMakeLists.txt b/oam/install_scripts/CMakeLists.txt index 9bd40053f..0a5dcd7dc 100644 --- a/oam/install_scripts/CMakeLists.txt +++ b/oam/install_scripts/CMakeLists.txt @@ -89,6 +89,7 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs-storagemanager.service.in" "${CM configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs-stop-controllernode.sh.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcs-stop-controllernode.sh" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs-loadbrm.py.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcs-loadbrm.py" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs-savebrm.py.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcs-savebrm.py" @ONLY) +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/mcs-savebrm.py.in" "${CMAKE_CURRENT_SOURCE_DIR}/mcssavebrm.py" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/columnstoreSyslog.in" "${CMAKE_CURRENT_SOURCE_DIR}/columnstoreSyslog" @ONLY) install(PROGRAMS columnstore-post-install @@ -124,3 +125,6 @@ install(FILES mariadb-columnstore.service DESTINATION ${ENGINE_SUPPORTDIR} COMPONENT columnstore-engine) install(FILES module DESTINATION ${ENGINE_DATADIR}/local COMPONENT columnstore-engine) + +find_package (Python3 COMPONENTS Interpreter REQUIRED) +add_test(NAME PythonUnitTests COMMAND ${Python3_EXECUTABLE} -m unittest test_mcs-savebrm.py) \ No newline at end of file diff --git a/oam/install_scripts/mcs-loadbrm.py.in b/oam/install_scripts/mcs-loadbrm.py.in index 2449c0a2e..2781fae3e 100755 --- a/oam/install_scripts/mcs-loadbrm.py.in +++ b/oam/install_scripts/mcs-loadbrm.py.in @@ -172,16 +172,13 @@ def is_node_primary(conf_root): success = True except HTTPError as exc: logging.warning( - 'Something goes wrong while requesting primary status ', - 'through api.', - 'Got response code "{}" with reason "{}".'.format( + 'Something goes wrong while requesting primary status through api. Got response code "{}" with reason "{}".'.format( exc.code, exc.reason ) ) except URLError: logging.warning( - 'CMAPI became unavailable while trying', - 'to request primary status.' + 'CMAPI became unavailable while trying to request primary status.' ) except Exception: logging.error( @@ -238,8 +235,7 @@ def get_meta(conf_root, meta_type): raise except URLError: logging.warning( - 'CMAPI on primary became unavailable while trying', - 'to request {} from it.'.format(meta_type) + 'CMAPI on primary became unavailable while trying to request {} from it.'.format(meta_type) ) raise except Exception: @@ -313,13 +309,12 @@ if __name__ == '__main__': if s3_enabled: # start SM using systemd if use_systemd: - cmd = 'systemctl start mcs-storagemanager' + CMD = 'systemctl start mcs-storagemanager' try: - subprocess.check_call(cmd, shell=True) + subprocess.check_call(CMD, shell=True) except subprocess.CalledProcessError as exc: logging.error( - 'Failed to start storagemanager. {}'.format(cmd), - 'exits with {}.'.format(exc.returncode) + 'Failed to start storagemanager. {} exits with {}.'.format(CMD, exc.returncode) ) sys.exit(1) time.sleep(1) # allow SM time to init @@ -397,13 +392,11 @@ if __name__ == '__main__': shutil.chown(current_name, USER, GROUP) else: logging.info( - 'Cmapi is not running on primary node. ' - 'Skip loading metafiles.' + 'Cmapi is not running on primary node. Skip loading metafiles.' ) except Exception as exc: logging.error( - 'Failed to detect primary or load BRM data from', - 'the primary node.', + 'Failed to detect primary or load BRM data from the primary node.', exc_info=True ) sys.exit(1) @@ -450,8 +443,7 @@ if __name__ == '__main__': else: if s3_enabled: logging.info( - 'brm_saves_currenty returned empty string from', - 'read_from_sm_with_retry' + 'brm_saves_currenty returned empty string from read_from_sm_with_retry' ) else: logging.info( diff --git a/oam/install_scripts/mcs-savebrm.py.in b/oam/install_scripts/mcs-savebrm.py.in index 8c78cd140..28a2e46b5 100755 --- a/oam/install_scripts/mcs-savebrm.py.in +++ b/oam/install_scripts/mcs-savebrm.py.in @@ -2,6 +2,7 @@ import configparser import fcntl import json +import glob import logging import os import socket @@ -9,6 +10,7 @@ import ssl import struct import subprocess import sys +import time import xml.etree.ElementTree as ET from urllib.request import Request, urlopen from urllib.error import HTTPError, URLError @@ -21,7 +23,11 @@ MCS_CONFIG_PATH = os.path.join(MCS_ETC_PATH, 'Columnstore.xml') SM_CONFIG_PATH = os.path.join(MCS_ETC_PATH, 'storagemanager.cnf') MCS_BIN_DIR = '@ENGINE_BINDIR@' SAVEBRM = os.path.join(MCS_BIN_DIR, 'save_brm') +EM_FILE_SUFFIX = '_em' +EM_FILE_SIZE_THRESHOLD = 1000 HALF_A_MINUTE = 30 +NUMBER_OF_FILES_TO_KEEP = 40 +DEFAULT_EM_LOCAL_PATH_PREFIX = '' LOCALHOST = '127.0.0.1' # according to https://www.ibm.com/docs/en/storage-sentinel/1.1.2?topic=installation-map-your-local-host-loopback-address LOCALHOSTS = ( @@ -33,6 +39,8 @@ LOCALHOSTS = ( ) API_VERSION = '0.4.0' API_PORT = '8640' +BRM_BACKUP_PATH = '/tmp/columnstore_tmp_files/rdwrscratch/' +BRM_BACKUP_PATH_PART = '{}_BRM_saves' def get_api_key(): @@ -60,7 +68,7 @@ def cmapi_available(): :return: is CMAPI running or not :rtype: bool """ - logging.debug('Detecting CMAPI is up and running.') + logging.error('Detecting CMAPI is up and running.') url = 'https://{}:{}/notfound'.format(LOCALHOST, API_PORT) request = Request(method='POST', url=url) ctx = get_unverified_context() @@ -101,7 +109,7 @@ def get_ip_address_by_nic(ifname): )[20:24] ) except Exception as exc: - logging.debug( + logging.error( 'Exception while getting IP address of an "{}" interface'.format( ifname ), @@ -120,7 +128,7 @@ def is_primary_fallback(current_hostname): :return: is node primary :rtype: bool """ - logging.debug( + logging.error( 'Current DBRM_Controller/IPAddr is {}'.format(current_hostname) ) hostnames = set() @@ -131,7 +139,7 @@ def is_primary_fallback(current_hostname): hostnames.update([hostnames_3tuple[0], *hostnames_3tuple[1]]) except: pass - logging.debug('Found hostnames {}.'.format(hostnames)) + logging.error('Found hostnames {}.'.format(hostnames)) return current_hostname in LOCALHOSTS or current_hostname in hostnames @@ -161,16 +169,13 @@ def is_node_primary(conf_root): success = True except HTTPError as exc: logging.warning( - 'Something goes wrong while requesting primary status ', - 'through api.', - 'Got response code "{}" with reason "{}".'.format( + 'Something goes wrong while requesting primary status through api. Got response code "{}" with reason "{}".'.format( exc.code, exc.reason ) ) except URLError: logging.warning( - 'CMAPI became unavailable while trying', - 'to request primary status.' + 'CMAPI became unavailable while trying to request primary status.' ) except Exception: logging.error( @@ -192,42 +197,193 @@ def is_node_primary(conf_root): return is_primary_fallback(conf_root.find('./DBRM_Controller/IPAddr').text) -if __name__ == '__main__': - master_addr = '' - pm_count = 0 - logging.basicConfig( - format='%(levelname)s: %(message)s', level=logging.DEBUG - ) - logging.debug('Loading Columnstore.xml.') +def get_file_size(file_path): + """ Returns the size of the file in bytes. """ try: - cs_config = ET.parse(MCS_CONFIG_PATH) - config_root = cs_config.getroot() - master_addr = config_root.find('./DBRM_Controller/IPAddr').text - pm_count = int( - config_root.find('./SystemModuleConfig/ModuleCount3').text + size = os.path.getsize(file_path) + return size + except OSError as e: + logging.error('OSError in get_file_size(): {}.'.format(e)) + return None + + +def em_is_empty(file_path_prefix): + """Returns True if EM file size is less than EM_FILE_SIZE_THRESHOLD + or its argument is None. + + :rtype: Bool + """ + # Add error message if EM is empty + if file_path_prefix is None: + is_em_empty = True + else: + filesize = get_file_size(file_path_prefix + EM_FILE_SUFFIX) + is_em_empty = filesize < EM_FILE_SIZE_THRESHOLD + if is_em_empty: + logging.error('EM file is none or its size {} is less than {} bytes.'.format(filesize, EM_FILE_SIZE_THRESHOLD)) + return is_em_empty + + +def clean_up_backup_brm_files(save_brm_dir_path): + """ Removes all but 5 last usable sets of BRM files in the specified directory. + Usable in the context means having non-empty EM. + """ + filenames = os.listdir(save_brm_dir_path) + filenames.sort(reverse=True) + files_to_remove = filenames[NUMBER_OF_FILES_TO_KEEP:] + for filename in files_to_remove: + file_path = os.path.join(save_brm_dir_path, filename) + logging.error('Clean up {}.'.format(file_path)) + try: + os.remove(file_path) + except OSError as e: + logging.error('OSError exception happens removing {}: {}.'.format(file_path, e)) + + +def remove_files_by_prefix_if_exist(file_path_prefix): + """ Removes files with the given prefix if they exist. """ + if file_path_prefix is None: + logging.error( + 'file_path_prefix is None. Cannot remove files.', + exc_info=True ) - logging.debug('Succesfully loaded Columnstore.xml.') - except (FileNotFoundError, AttributeError, ValueError) as e: - # is it correct case? + return + try: + + files_paths = glob.glob(file_path_prefix + '*') + for file_path in files_paths: + os.remove(file_path) + except OSError as e: + logging.error( + 'Error removing file: {} - {}'.format(file_path, e.strerror), + exc_info=True + ) + + +def get_config_root_from_file(file_path): + """Returns XML root element from file. + + :param file_path: xml config path + :return: XML root element or None + :rtype: Element or None + """ + try: + cs_config = ET.parse(file_path) + return cs_config.getroot() + except (FileNotFoundError, AttributeError, ValueError): logging.error( 'Exception while loading Columnstore.xml. Continue anyway.', exc_info=True ) + return None - logging.debug('Reading SM config.') - sm_config = configparser.ConfigParser() - files_read = len(sm_config.read(SM_CONFIG_PATH)) - storage = sm_config.get( - 'ObjectStorage', 'service', fallback='LocalStorage' - ) +def get_epoch_prefix(): + """Returns a prefix with epoch time - if is_node_primary(config_root): + :rtype: String + """ + epoch_time = int(time.time()) + + return 'backup_{}'.format(epoch_time) + + +def get_save_brm_dir_path(a_mcs_config_root): + """Returns a path that SM treats as local + + :param file_path: xml config XML root + :rtype + """ + save_brm_dir_path = BRM_BACKUP_PATH + if a_mcs_config_root is not None: try: - retcode = subprocess.check_call(SAVEBRM, shell=True) - except subprocess.CalledProcessError as exc: - logging.error('{} exits with {}.'.format(exc.cmd, exc.returncode)) - sys.exit(1) + system_temp_file_dir = a_mcs_config_root.find('./SystemConfig/SystemTempFileDir').text + hdfs_rdwr_scratch = a_mcs_config_root.find('./SystemConfig/hdfsRdwrScratch').text + # There is a danger to have no '/' in the end of system_temp_file_dir + # or have two of them there. In both cases save_brm will fail to store + # files locally. + save_brm_dir_path = system_temp_file_dir + hdfs_rdwr_scratch + except AttributeError: + logging.error('Exception while getting SystemTempFileDir and hdfsRdwrScratch from Columnstore.xml', exc_info=True) + + return save_brm_dir_path + + +def get_save_brm_path_prefix(a_mcs_config_root): + """Returns a path that SM treats as local + + :param file_path: xml config XML root + :rtype: String + """ + epoch_prefix = get_epoch_prefix() + return get_save_brm_dir_path(a_mcs_config_root) + '/' + BRM_BACKUP_PATH_PART.format(epoch_prefix) + + +def call_save_brm(path): + """Calls save_brm first and then tries to call it with local path. + + :param file_path: xml config XML root + :rtype: None + """ + savebrm_cmd = SAVEBRM + ' ' + path + try: + subprocess.check_call(savebrm_cmd, shell=True) + except subprocess.CalledProcessError as exc: + logging.error('The call to {} exits with {}.'.format(savebrm_cmd, exc.returncode)) + return None + except OSError: + logging.error('Os error while calling savebrm', exc_info=True) + return None + return path + + +def call_save_brm_locally(a_mcs_config_root): + """Calls save_brm first and then tries to call it with local path. + + :param file_path: xml config XML root + :rtype: None + """ + local_path = get_save_brm_path_prefix(a_mcs_config_root) + return call_save_brm(local_path) + + +def call_save_brm_with_local_fallback(a_mcs_config_root): + """Calls save_brm first and then tries to call it with local path. + + :param file_path: xml config XML root + :rtype: None + """ + try: + subprocess.check_call(SAVEBRM, shell=True) + except subprocess.CalledProcessError as exc: + logging.error('The primary call to {} exits with {}.'.format(exc.cmd, exc.returncode)) + backup_path = get_save_brm_path_prefix(a_mcs_config_root) + logging.error('Back up BRM files locally to {}.'.format(backup_path)) + backup_cmd = SAVEBRM + ' ' + backup_path + try: + subprocess.check_call(backup_cmd, shell=True) + except subprocess.CalledProcessError: + logging.error('The backup call to {} exits with {}.'.format(exc.cmd, exc.returncode)) except OSError: - logging.error('Os error while calling savebrm', exc_info=True) - sys.exit(0) + logging.error('Os error while calling savebrm during the backup', exc_info=True) + + sys.exit(1) + except OSError: + logging.error('Os error while calling savebrm', exc_info=True) + sys.exit(1) + + +if __name__ == '__main__': + mcs_config_root = get_config_root_from_file(MCS_CONFIG_PATH) + # config_root can be None + if is_node_primary(mcs_config_root): + em_local_path_prefix = call_save_brm_locally(mcs_config_root) + if not em_local_path_prefix or em_is_empty(em_local_path_prefix): + # remove_files_by_prefix_if_exist(em_local_path_prefix) + logging.error('Exiting with error.') + sys.exit(1) + + clean_up_backup_brm_files(get_save_brm_dir_path(mcs_config_root)) + + call_save_brm(DEFAULT_EM_LOCAL_PATH_PREFIX) + sys.exit(0) diff --git a/oam/install_scripts/test_mcs-savebrm.py b/oam/install_scripts/test_mcs-savebrm.py new file mode 100644 index 000000000..aa758b496 --- /dev/null +++ b/oam/install_scripts/test_mcs-savebrm.py @@ -0,0 +1,117 @@ +import unittest +from unittest.mock import patch, mock_open, MagicMock +import socket +import ssl +import json +import os +import subprocess +from urllib.error import HTTPError, URLError +from urllib.request import Request +from xml.etree.ElementTree import Element +import importlib.util + +import mcssavebrm + +class TestMcsSavebrmFunctions(unittest.TestCase): + @patch('mcssavebrm.configparser.ConfigParser.get', return_value='test_api_key') + @patch('mcssavebrm.configparser.ConfigParser.read') + def test_get_api_key(self, mock_read, mock_get): + self.assertEqual(mcssavebrm.get_api_key(), 'test_api_key') + mock_read.assert_called_once_with(mcssavebrm.CMAPI_CONFIG_PATH) + + def test_get_unverified_context(self): + ctx = mcssavebrm.get_unverified_context() + self.assertFalse(ctx.check_hostname) + self.assertEqual(ctx.verify_mode, ssl.CERT_NONE) + + @patch('mcssavebrm.urlopen') + @patch('mcssavebrm.get_unverified_context') + def test_cmapi_available(self, mock_get_unverified_context, mock_urlopen): + mock_get_unverified_context.return_value = ssl._create_unverified_context() + mock_urlopen.side_effect = HTTPError(None, 404, 'Not Found', None, None) + self.assertTrue(mcssavebrm.cmapi_available()) + + @patch('mcssavebrm.fcntl.ioctl') + @patch('mcssavebrm.socket.socket') + def test_get_ip_address_by_nic(self, mock_socket, mock_ioctl): + mock_socket_inst = MagicMock() + mock_socket.return_value = mock_socket_inst + mock_ioctl.return_value = b'\x00' * 20 + b'\x7f\x00\x00\x01' + self.assertEqual(mcssavebrm.get_ip_address_by_nic('lo'), '127.0.0.1') + + @patch('mcssavebrm.get_ip_address_by_nic', return_value='127.0.0.1') + @patch('mcssavebrm.socket.gethostbyaddr', return_value=('localhost', [], [])) + @patch('mcssavebrm.socket.if_nameindex', return_value=[(1, 'lo')]) + def test_is_primary_fallback(self, mock_if_nameindex, mock_gethostbyaddr, mock_get_ip_address_by_nic): + self.assertTrue(mcssavebrm.is_primary_fallback('localhost')) + + @patch('mcssavebrm.cmapi_available', return_value=True) + @patch('mcssavebrm.urlopen') + @patch('mcssavebrm.get_unverified_context') + @patch('mcssavebrm.get_api_key', return_value='test_api_key') + def test_is_node_primary(self, mock_get_api_key, mock_get_unverified_context, mock_urlopen, mock_cmapi_available): + mock_get_unverified_context.return_value = ssl._create_unverified_context() + mock_response = MagicMock() + mock_response.read.return_value = json.dumps({'is_primary': 'True'}).encode('utf-8') + mock_urlopen.return_value.__enter__.return_value = mock_response + root = MagicMock() + self.assertTrue(mcssavebrm.is_node_primary(root)) + + @patch('os.path.getsize', return_value=1024) + def test_get_file_size(self, mock_getsize): + self.assertEqual(mcssavebrm.get_file_size('test_file'), 1024) + + @patch('mcssavebrm.get_file_size', return_value=500) + def test_em_is_empty(self, mock_get_file_size): + self.assertTrue(mcssavebrm.em_is_empty('test_prefix')) + + @patch('os.remove') + @patch('os.listdir', return_value=[f'test_file{i}' for i in range(50)]) + def test_clean_up_backup_brm_files(self, mock_listdir, mock_remove): + mcssavebrm.clean_up_backup_brm_files('/dummy/path') + self.assertEqual(mock_remove.call_count, 10) + + @patch('os.remove') + @patch('mcssavebrm.glob.glob', return_value=['test_file1', 'test_file2']) + def test_remove_files_by_prefix_if_exist(self, mock_glob, mock_remove): + mcssavebrm.remove_files_by_prefix_if_exist('test_prefix') + mock_remove.assert_any_call('test_file1') + mock_remove.assert_any_call('test_file2') + + @patch('xml.etree.ElementTree.parse') + def test_get_config_root_from_file(self, mock_parse): + mock_tree = MagicMock() + mock_parse.return_value = mock_tree + self.assertEqual(mcssavebrm.get_config_root_from_file('test_file'), mock_tree.getroot()) + + @patch('time.time', return_value=1624478400) + def test_get_epoch_prefix(self, mock_time): + self.assertEqual(mcssavebrm.get_epoch_prefix(), 'backup_1624478400') + + @patch('mcssavebrm.get_epoch_prefix', return_value='backup_1624478400') + @patch('mcssavebrm.get_save_brm_dir_path', return_value='/tmp/columnstore_tmp_files/rdwrscratch/') + def test_get_save_brm_path_prefix(self, mock_get_epoch_prefix, mock_get_save_brm_dir_path): + root = MagicMock() + self.assertIn('backup_1624478400_BRM_saves', mcssavebrm.get_save_brm_path_prefix(root)) + + @patch('subprocess.check_call') + def test_call_save_brm(self, mock_check_call): + self.assertEqual(mcssavebrm.call_save_brm('test_path'), 'test_path') + + @patch('mcssavebrm.call_save_brm', return_value='test_path') + @patch('mcssavebrm.get_save_brm_path_prefix', return_value='test_path') + def test_call_save_brm_locally(self, mock_get_save_brm_path_prefix, mock_call_save_brm): + root = MagicMock() + self.assertEqual(mcssavebrm.call_save_brm_locally(root), 'test_path') + + @patch('subprocess.check_call') + @patch('mcssavebrm.get_save_brm_path_prefix', return_value='test_path') + def test_call_save_brm_with_local_fallback(self, mock_get_save_brm_path_prefix, mock_check_call): + root = MagicMock() + mock_check_call.side_effect = [subprocess.CalledProcessError(1, 'test'), None] + with self.assertRaises(SystemExit): + mcssavebrm.call_save_brm_with_local_fallback(root) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/utils/common/cgroupconfigurator.cpp b/utils/common/cgroupconfigurator.cpp index c950c99ae..d01c9e2bb 100644 --- a/utils/common/cgroupconfigurator.cpp +++ b/utils/common/cgroupconfigurator.cpp @@ -79,11 +79,11 @@ CGroupConfigurator::CGroupConfigurator() else cGroupDefined = true; + if (cGroupName == "just_no_group_use_local") + cGroupName = std::string{}; ifstream v2Check("/sys/fs/cgroup/cgroup.controllers"); cGroupVersion_ = (v2Check) ? v2 : v1; - - } CGroupConfigurator::~CGroupConfigurator() diff --git a/utils/common/collation.h b/utils/common/collation.h index 82e8e30b0..e5781b8d9 100644 --- a/utils/common/collation.h +++ b/utils/common/collation.h @@ -146,6 +146,28 @@ class Charset Charset(CHARSET_INFO* cs = nullptr) : mCharset(cs ? cs : &my_charset_bin) { } + + bool operator==(const Charset& rhs) const + { + return rhs.getCharset().cs_name.str == getCharset().cs_name.str; + } + + bool operator!=(const Charset& rhs) const + { + return !(*this == rhs); + } + + std::string convert(const std::string& from, const datatypes::Charset& fromCs) const + { + std::string result; + uint dummy_errors; + result.resize(from.size() * getCharset().mbmaxlen); + size_t resultingSize = my_convert(const_cast(result.c_str()), result.size(), &getCharset(), + from.c_str(), from.size(), &fromCs.getCharset(), &dummy_errors); + result.resize(resultingSize); + return result; + } + Charset(uint32_t charsetNumber); void setCharset(uint32_t charsetNumber); CHARSET_INFO& getCharset() const @@ -170,7 +192,7 @@ class Charset // please note that ConstString has an assertion so that nullptr data has zero length. const char* s1 = str1.str(); const char* s2 = str2.str(); - return mCharset->strnncollsp(s1 ? s1 : "", str1.length(), s2 ? s2 : "" , str2.length()); + return mCharset->strnncollsp(s1 ? s1 : "", str1.length(), s2 ? s2 : "", str2.length()); } int strnncollsp(const char* str1, size_t length1, const char* str2, size_t length2) const { diff --git a/utils/funcexp/CMakeLists.txt b/utils/funcexp/CMakeLists.txt index 3c94dea39..59a33bd67 100644 --- a/utils/funcexp/CMakeLists.txt +++ b/utils/funcexp/CMakeLists.txt @@ -150,7 +150,7 @@ add_library(funcexp SHARED ${funcexp_LIB_SRCS}) add_dependencies(funcexp loggingcpp) -target_link_libraries(funcexp ${NETSNMP_LIBRARIES} pron) +target_link_libraries(funcexp ${NETSNMP_LIBRARIES} pron pcre2-8) install(TARGETS funcexp DESTINATION ${ENGINE_LIBDIR} COMPONENT columnstore-engine) diff --git a/utils/funcexp/func_ifnull.cpp b/utils/funcexp/func_ifnull.cpp index 7cb5ebebc..2dcb67736 100644 --- a/utils/funcexp/func_ifnull.cpp +++ b/utils/funcexp/func_ifnull.cpp @@ -25,6 +25,7 @@ #include using namespace std; +#include "constantcolumn.h" #include "functor_all.h" #include "functioncolumn.h" using namespace execplan; @@ -60,7 +61,14 @@ int64_t Func_ifnull::getIntVal(Row& row, FunctionParm& parm, bool& isNull, Calpo if (isNull) { isNull = false; - return parm[1]->data()->getIntVal(row, isNull); + r = parm[1]->data()->getIntVal(row, isNull); + // MCOL-5237 In case we substitude a null value for `DATETIME` column with a value from `ConstantColumn` + // we have to convert the value into the `DATETIME` format for `int64_t` type. + if (parm[0]->data()->resultType().colDataType == CalpontSystemCatalog::DATETIME && + dynamic_cast(parm[1]->data())) + { + r = r << 48; + } } return r; diff --git a/utils/funcexp/func_regexp.cpp b/utils/funcexp/func_regexp.cpp index 342cfcf0c..4404d7650 100644 --- a/utils/funcexp/func_regexp.cpp +++ b/utils/funcexp/func_regexp.cpp @@ -25,14 +25,10 @@ #include using namespace std; -#ifdef __linux__ -#include -#else -#include -using namespace boost; -#endif +#include "utils/pcre2/jpcre2.hpp" #include "functor_bool.h" +#include "functor_str.h" #include "functioncolumn.h" #include "predicateoperator.h" #include "constantcolumn.h" @@ -43,12 +39,59 @@ using namespace execplan; #include "errorcodes.h" #include "idberrorinfo.h" #include "errorids.h" + using namespace logging; namespace { -inline bool getBool(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull, - CalpontSystemCatalog::ColType& ct, long timeZone) + +using jp = jpcre2::select; + +struct PCREOptions +{ + PCREOptions(execplan::CalpontSystemCatalog::ColType& ct); + + datatypes::Charset dataCharset = my_charset_utf8mb3_general_ci; + datatypes::Charset libraryCharset = my_charset_utf8mb3_general_ci; + jpcre2::Uint flags = 0; + bool conversionIsNeeded = false; +}; + +PCREOptions::PCREOptions(execplan::CalpontSystemCatalog::ColType& ct) +{ + datatypes::Charset cs = ct.getCharset(); + datatypes::Charset myCharsetBin = my_charset_bin; + + // TODO use system variable instead if hardcode default_regex_flags_pcre(_current_thd()); + // PCRE2_DOTALL | PCRE2_DUPNAMES | PCRE2_EXTENDED | PCRE2_EXTENDED_MORE | PCRE2_MULTILINE | PCRE2_UNGREEDY; + + jpcre2::Uint defaultFlags = 0; + + flags = (cs != myCharsetBin ? (PCRE2_UTF | PCRE2_UCP) : 0) | + ((cs.getCharset().state & (MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE2_CASELESS) | defaultFlags; + + // Convert text data to utf-8. + dataCharset = cs; + libraryCharset = cs == myCharsetBin ? my_charset_bin : my_charset_utf8mb3_general_ci; +} + +struct RegExpParams +{ + std::string expression; + std::string pattern; + RegExpParams& CharsetFix(const PCREOptions options) + { + if (options.conversionIsNeeded) + return *this; + + expression = options.libraryCharset.convert(expression, options.dataCharset); + pattern = options.libraryCharset.convert(pattern, options.dataCharset); + return *this; + } +}; + +inline RegExpParams getEpressionAndPattern(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull, + CalpontSystemCatalog::ColType& ct, long timeZone) { string expr; string pattern; @@ -212,39 +255,148 @@ inline bool getBool(rowgroup::Row& row, funcexp::FunctionParm& pm, bool& isNull, } } -#ifdef __linux__ - regex_t re; - - regcomp(&re, pattern.c_str(), REG_EXTENDED | REG_NOSUB); - - int res = regexec(&re, expr.c_str(), 0, NULL, 0); - regfree(&re); - - if (res == 0) - return true; - else - return false; - -#else - std::regex pat(pattern.c_str()); - return std::regex_search(expr.c_str(), pat); -#endif + return RegExpParams{expr, pattern}; } - } // namespace namespace funcexp { +CalpontSystemCatalog::ColType Func_regexp_replace::operationType(FunctionParm& fp, + CalpontSystemCatalog::ColType& resultType) +{ + // operation type is not used by this functor + return fp[0]->data()->resultType(); +} + +CalpontSystemCatalog::ColType Func_regexp_substr::operationType(FunctionParm& fp, + CalpontSystemCatalog::ColType& resultType) +{ + // operation type is not used by this functor + return fp[0]->data()->resultType(); +} + +CalpontSystemCatalog::ColType Func_regexp_instr::operationType(FunctionParm& fp, + CalpontSystemCatalog::ColType& resultType) +{ + // operation type is not used by this functor + return fp[0]->data()->resultType(); +} + CalpontSystemCatalog::ColType Func_regexp::operationType(FunctionParm& fp, CalpontSystemCatalog::ColType& resultType) { return resultType; } -bool Func_regexp::getBoolVal(rowgroup::Row& row, FunctionParm& pm, bool& isNull, +/* + returns the string subject with all occurrences of the regular expression pattern replaced by + the string replace. If no occurrences are found, then subject is returned as is. + https://mariadb.com/kb/en/regexp_replace/ +*/ +std::string Func_regexp_replace::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& ct) + +{ + RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone()); + + if (isNull) + return std::string{}; + + const auto& replaceWith = fp[2]->data()->getStrVal(row, isNull); + + if (replaceWith.isNull()) + return param.expression; + + PCREOptions options(ct); + param.CharsetFix(options); + jp::Regex re(param.pattern, options.flags); + + const auto& replaceWithStr = replaceWith.unsafeStringRef(); + if (options.conversionIsNeeded) + { + const auto& convertedReplaceToken = options.libraryCharset.convert(replaceWithStr, options.dataCharset); + return re.replace(param.expression, convertedReplaceToken, "g"); + } + + return re.replace(param.expression, replaceWithStr, "g"); +} + +/* + Returns the part of the string subject that matches the regular expression pattern, or an empty string if + pattern was not found. https://mariadb.com/kb/en/regexp_substr/ +*/ +std::string Func_regexp_substr::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& ct) + +{ + RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone()); + + if (isNull) + return std::string{}; + + PCREOptions options(ct); + param.CharsetFix(options); + + jp::Regex re(param.pattern, options.flags); + jp::RegexMatch rm(&re); + jp::VecNum vec_num; + + size_t count = rm.setSubject(param.expression).setNumberedSubstringVector(&vec_num).match(); + + if (count == 0) + return std::string{}; + + return vec_num[0][0]; +} + +/* + Returns the position of the first occurrence of the regular expression pattern in the string subject, or 0 + if pattern was not found. https://mariadb.com/kb/en/regexp_instr/ +*/ +std::string Func_regexp_instr::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& ct) + +{ + RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone()); + + if (isNull) + return std::string{}; + + PCREOptions options(ct); + param.CharsetFix(options); + + jp::Regex re(param.pattern, options.flags); + jp::RegexMatch rm(&re); + jpcre2::VecOff vec_soff; + + size_t count = rm.setSubject(param.expression).setMatchStartOffsetVector(&vec_soff).match(); + + if (count == 0) + return "0"; + + size_t offset = vec_soff[0]; + size_t charNumber = options.libraryCharset.getCharset().numchars(param.expression.c_str(), + param.expression.c_str() + offset); + + return std::to_string(charNumber + 1); +} + +/* + https://mariadb.com/kb/en/regexp/ +*/ +bool Func_regexp::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, CalpontSystemCatalog::ColType& ct) { - return getBool(row, pm, isNull, ct, ct.getTimeZone()) && !isNull; + RegExpParams param = getEpressionAndPattern(row, fp, isNull, ct, ct.getTimeZone()); + + if (isNull) + return false; + + PCREOptions options(ct); + param.CharsetFix(options); + + jp::Regex re(param.pattern, options.flags); + return re.match(param.expression); } } // namespace funcexp diff --git a/utils/funcexp/funcexp.cpp b/utils/funcexp/funcexp.cpp index b07fdf27c..82f6003d8 100644 --- a/utils/funcexp/funcexp.cpp +++ b/utils/funcexp/funcexp.cpp @@ -215,7 +215,10 @@ FuncExp::FuncExp() fFuncMap["quarter"] = new Func_quarter(); fFuncMap["radians"] = new Func_radians(); // dlh fFuncMap["rand"] = new Func_rand(); - fFuncMap["regexp"] = new Func_regexp(); // dlh + fFuncMap["regexp"] = new Func_regexp(); + fFuncMap["regexp_instr"] = new Func_regexp_instr(); + fFuncMap["regexp_replace"] = new Func_regexp_replace(); + fFuncMap["regexp_substr"] = new Func_regexp_substr(); // dlh fFuncMap["repeat"] = new Func_repeat(); // dlh fFuncMap["replace"] = new Func_replace(); // dlh fFuncMap["replace_oracle"] = new Func_replace_oracle(); // dlh diff --git a/utils/funcexp/functor_str.h b/utils/funcexp/functor_str.h index b2fbe0933..17b8adea8 100644 --- a/utils/funcexp/functor_str.h +++ b/utils/funcexp/functor_str.h @@ -470,6 +470,61 @@ class Func_replace : public Func_Str execplan::CalpontSystemCatalog::ColType& op_ct); }; +class Func_regexp_replace : public Func_Str +{ + public: + Func_regexp_replace() : Func_Str("regexp_replace") + { + } + virtual ~Func_regexp_replace() + { + } + + execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp, + execplan::CalpontSystemCatalog::ColType& resultType); + + std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& op_ct); +}; + + +class Func_regexp_instr : public Func_Str +{ + public: + Func_regexp_instr() : Func_Str("regexp_instr") + { + } + virtual ~Func_regexp_instr() + { + } + + execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp, + execplan::CalpontSystemCatalog::ColType& resultType); + + std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& op_ct); +}; + + +class Func_regexp_substr : public Func_Str +{ + public: + Func_regexp_substr() : Func_Str("regexp_substr") + { + } + virtual ~Func_regexp_substr() + { + } + + execplan::CalpontSystemCatalog::ColType operationType(FunctionParm& fp, + execplan::CalpontSystemCatalog::ColType& resultType); + + std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& op_ct); +}; + + + class Func_replace_oracle : public Func_Str { public: diff --git a/utils/joiner/joinpartition.cpp b/utils/joiner/joinpartition.cpp index 6db3ade42..bccf89b38 100644 --- a/utils/joiner/joinpartition.cpp +++ b/utils/joiner/joinpartition.cpp @@ -31,7 +31,6 @@ using namespace rowgroup; using namespace messageqcpp; using namespace logging; - namespace joiner { // FIXME: Possible overflow, we have to null it after clearing files. @@ -165,6 +164,13 @@ JoinPartition::JoinPartition(const JoinPartition& jp, bool splitMode, uint32_t c smallFilename = filenamePrefix + "-small"; largeFilename = filenamePrefix + "-large"; + // FIXME(MCOL-5597):Tuning issue: with the defaults, each 100MB bucket would split s.t. + // the children could store another 4GB total. + // Given a good hash and evenly distributed data, + // the first level of expansion would happen for all JPs at once, giving a total + // capacity of (4GB * 40) = 160GB, when actual usage at that point is a little over 4GB. + // Instead, each will double in size, giving a capacity of 8GB -> 16 -> 32, and so on. + bucketCount = 2; smallSizeOnDisk = largeSizeOnDisk = 0; buffer.reinit(smallRG); @@ -442,7 +448,6 @@ int64_t JoinPartition::convertToSplitMode() } } - boost::filesystem::remove(smallFilename); smallFilename.clear(); @@ -481,8 +486,8 @@ int64_t JoinPartition::processSmallBuffer(RGData& rgData) ret = writeByteStream(0, bs); - if (rg.getRowCount()) - htSizeEstimate += rg.getDataSize(); + // FIXME(MCOL-5597): Properly calculate the size of the bucket. + htSizeEstimate += rg.getRowCount() * rg.getColumnCount(); // Check whether this partition is now too big -> convert to split mode. if (htTargetSize < htSizeEstimate && canConvertToSplitMode()) ret += convertToSplitMode(); @@ -849,7 +854,15 @@ uint64_t JoinPartition::writeByteStream(int which, ByteStream& bs) bs.advance(len); offset = fs.tellp(); + fs.close(); + + if (fs.fail()) + { + ostringstream os; + os << "Disk join file " << filename << ": close() failure, probable exhaustion of disk space." << endl; + throw IDBExcept(os.str().c_str(), ERR_DBJ_FILE_IO_ERROR); + } return ret; } diff --git a/utils/loggingcpp/ErrorMessage.txt b/utils/loggingcpp/ErrorMessage.txt index 8b18aa32f..c6f67b803 100755 --- a/utils/loggingcpp/ErrorMessage.txt +++ b/utils/loggingcpp/ErrorMessage.txt @@ -108,6 +108,8 @@ 2060 ERR_UNION_DECIMAL_OVERFLOW Union operation exceeds maximum DECIMAL precision of 38. +2061 ERR_NOT_SUPPORTED_GROUPBY_ORDERBY_EXPRESSION %1% is not in GROUP BY clause, not a column or an expression that contains function. + # Sub-query errors 3001 ERR_NON_SUPPORT_SUB_QUERY_TYPE This subquery type is not supported yet. 3002 ERR_MORE_THAN_1_ROW Subquery returns more than 1 row. diff --git a/utils/pcre2/jpcre2.hpp b/utils/pcre2/jpcre2.hpp new file mode 100644 index 000000000..a3188fffb --- /dev/null +++ b/utils/pcre2/jpcre2.hpp @@ -0,0 +1,5153 @@ +/* ***************************************************************************** + * ******************* C++ wrapper for PCRE2 Library **************************** + * ***************************************************************************** + * Copyright (c) Md. Jahidul Hamid + * + * ----------------------------------------------------------------------------- + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * */ + +/** @file jpcre2.hpp + * @brief Main header file for JPCRE2 library to be included by programs that uses its functionalities. + * It includes the `pcre2.h` header, therefore you shouldn't include `pcre2.h`, neither should you define `PCRE2_CODE_UNIT_WIDTH` before including + * `jpcre2.hpp`. + * If your `pcre2.h` header is not in standard include paths, you may include `pcre2.h` with correct path before including `jpcre2.hpp` + * manually. In this case you will have to define `PCRE2_CODE_UNIT_WIDTH` before including `pcre2.h`. + * Make sure to link required PCRE2 libraries when compiling. + * + * @author [Md Jahidul Hamid](https://github.com/neurobin) + */ + +#ifndef JPCRE2_HPP +#define JPCRE2_HPP + +#ifndef PCRE2_CODE_UNIT_WIDTH + +///@def PCRE2_CODE_UNIT_WIDTH +///This macro does not have any significance in JPCRE2 context. +///It is defined as 0 by default. Defining it before including jpcre2.hpp +///will override the default (discouraged as it will make it harder for you to detect problems), +///but still it will have no effect in a JPCRE2 perspective. +///Defining it with an invalid value will yield to compile error. +#define PCRE2_CODE_UNIT_WIDTH 0 +#endif + +//previous inclusion of pcre2.h will be respected and we won't try to include it twice. +//Thus one can pre-include pcre2.h from an arbitrary/non-standard path. +#ifndef PCRE2_MAJOR + #include // pcre2 header +#endif +#include // std::string, std::wstring +#include // std::vector +#include // std::map +#include // std::fprintf +#include // CHAR_BIT +#include // std::abort() + +#if __cplusplus >= 201103L || _MSVC_LANG >= 201103L + #define JPCRE2_USE_MINIMUM_CXX_11 1 + #include + #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK + #include // std::function + #endif +#endif +#if __cplusplus >= 201703L || _MSVC_LANG >= 201703L + #define JPCRE2_USE_MINIMUM_CXX_17 1 + #include +#else + #ifdef JPCRE2_UNSET_CAPTURES_NULL + #error JPCRE2_UNSET_CAPTURES_NULL requires C++17 + #endif +#endif + +#define JPCRE2_UNUSED(x) ((void)(x)) +#if defined(NDEBUG) || defined(JPCRE2_NDEBUG) + #define JPCRE2_ASSERT(cond, msg) ((void)0) + #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) ((void)0) +#else + #define JPCRE2_ASSERT(cond, msg) jpcre2::jassert(cond, msg, __FILE__, __LINE__) + #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) jpcre2::_jvassert(cond, name, __FILE__, __LINE__) +#endif + +// In Windows, Windows.h defines ERROR macro +// It conflicts with our jpcre2::ERROR namespace +#ifdef ERROR +#undef ERROR +#endif + + +/** @namespace jpcre2 + * Top level namespace of JPCRE2. + * + * All functions, classes/structs, constants, enums that are provided by JPCRE2 belong to this namespace while + * **PCRE2** structs, functions, constants remain outside of its scope. + * + * If you want to use any PCRE2 functions or constants, + * remember that they are in the global scope and should be used as such. + */ +namespace jpcre2 { + + +///Define for JPCRE2 version. +///It can be used to support changes in different versions of the lib. +#define JPCRE2_VERSION 103201L + +/** @namespace jpcre2::INFO + * Namespace to provide information about JPCRE2 library itself. + * Contains constant Strings with version info. + */ +namespace INFO { + static const char NAME[] = "JPCRE2"; ///< Name of the project + static const char FULL_VERSION[] = "10.32.01"; ///< Full version string + static const char VERSION_GENRE[] = "10"; ///< Generation, depends on original PCRE2 version + static const char VERSION_MAJOR[] = "32"; ///< Major version, updated when API change is made + static const char VERSION_MINOR[] = "01"; ///< Minor version, includes bug fix or minor feature upgrade + static const char VERSION_PRE_RELEASE[] = ""; ///< Alpha or beta (testing) release version +} + + +typedef PCRE2_SIZE SIZE_T; ///< Used for match count and vector size +typedef uint32_t Uint; ///< Used for options (bitwise operation) +typedef uint8_t Ush; ///< 8 bit unsigned integer. +typedef std::vector VecOff; ///< vector of size_t. +typedef std::vector VecOpt; ///< vector for Uint option values. + +/// @namespace jpcre2::ERROR +/// Namespace for error codes. +namespace ERROR { + /** Error numbers for JPCRE2. + * JPCRE2 error numbers are positive integers while + * PCRE2 error numbers are negative integers. + */ + enum { + INVALID_MODIFIER = 2, ///< Invalid modifier was detected + INSUFFICIENT_OVECTOR = 3 ///< Ovector was not big enough during a match + }; +} + + +/** These constants provide JPCRE2 options. + */ +enum { + NONE = 0x0000000u, ///< Option 0 (zero) + FIND_ALL = 0x0000002u, ///< Find all during match (global match) + JIT_COMPILE = 0x0000004u ///< Perform JIT compilation for optimization +}; + + +//enableif and is_same implementation +template +struct EnableIf{}; +template +struct EnableIf{typedef T Type;}; + +template +struct IsSame{ static const bool value = false; }; +template +struct IsSame{ static const bool value = true; }; + + +///JPCRE2 assert function. +///Aborts with an error message if condition fails. +///@param cond boolean condition +///@param msg message (std::string) +///@param f file where jassert was called. +///@param line line number where jassert was called. +static inline void jassert(bool cond, const char* msg, const char* f, size_t line){ + if(!cond) { + std::fprintf(stderr,"\n\tE: AssertionFailure\n%s\nAssertion failed in file: %s\t at line: %u\n", msg, f, (unsigned)line); + std::abort(); + } +} + +static inline void _jvassert(bool cond, char const * name, const char* f, size_t line){ + jassert(cond, (std::string("ValueError: \n\ + Required data vector of type ")+std::string(name)+" is empty.\n\ + Your MatchEvaluator callback function is not\n\ + compatible with existing data!!\n\ + You are trying to use a vector that does not\n\ + have any match data. Either call nreplace() or replace()\n\ + with true or perform a match with appropriate\n\ + callback function. For more details, refer to\n\ + the doc in MatchEvaluator section.").c_str(), f, line); +} + +static inline std::string _tostdstring(unsigned x){ + char buf[128]; + int written = std::sprintf(buf, "%u", x); + return (written > 0) ? std::string(buf, buf + written) : std::string(); +} + + +////////////////////////// The following are type and function mappings from PCRE2 interface to JPCRE2 interface ///////////////////////// + +//forward declaration + +template struct Pcre2Type; +template struct Pcre2Func; + +//PCRE2 types +//These templated types will be used in place of actual types +template struct Pcre2Type {}; + +template<> struct Pcre2Type<8>{ + //typedefs used + typedef PCRE2_UCHAR8 Pcre2Uchar; + typedef PCRE2_SPTR8 Pcre2Sptr; + typedef pcre2_code_8 Pcre2Code; + typedef pcre2_compile_context_8 CompileContext; + typedef pcre2_match_data_8 MatchData; + typedef pcre2_general_context_8 GeneralContext; + typedef pcre2_match_context_8 MatchContext; + typedef pcre2_jit_callback_8 JitCallback; + typedef pcre2_jit_stack_8 JitStack; +}; + +template<> struct Pcre2Type<16>{ + //typedefs used + typedef PCRE2_UCHAR16 Pcre2Uchar; + typedef PCRE2_SPTR16 Pcre2Sptr; + typedef pcre2_code_16 Pcre2Code; + typedef pcre2_compile_context_16 CompileContext; + typedef pcre2_match_data_16 MatchData; + typedef pcre2_general_context_16 GeneralContext; + typedef pcre2_match_context_16 MatchContext; + typedef pcre2_jit_callback_16 JitCallback; + typedef pcre2_jit_stack_16 JitStack; +}; + +template<> struct Pcre2Type<32>{ + //typedefs used + typedef PCRE2_UCHAR32 Pcre2Uchar; + typedef PCRE2_SPTR32 Pcre2Sptr; + typedef pcre2_code_32 Pcre2Code; + typedef pcre2_compile_context_32 CompileContext; + typedef pcre2_match_data_32 MatchData; + typedef pcre2_general_context_32 GeneralContext; + typedef pcre2_match_context_32 MatchContext; + typedef pcre2_jit_callback_32 JitCallback; + typedef pcre2_jit_stack_32 JitStack; +}; + +//wrappers for PCRE2 functions +template struct Pcre2Func{}; + +//8-bit version +template<> struct Pcre2Func<8> { + static Pcre2Type<8>::CompileContext* compile_context_create(Pcre2Type<8>::GeneralContext *gcontext){ + return pcre2_compile_context_create_8(gcontext); + } + static void compile_context_free(Pcre2Type<8>::CompileContext *ccontext){ + pcre2_compile_context_free_8(ccontext); + } + static Pcre2Type<8>::CompileContext* compile_context_copy(Pcre2Type<8>::CompileContext* ccontext){ + return pcre2_compile_context_copy_8(ccontext); + } + static const unsigned char * maketables(Pcre2Type<8>::GeneralContext* gcontext){ + return pcre2_maketables_8(gcontext); + } + static int set_character_tables(Pcre2Type<8>::CompileContext * ccontext, const unsigned char * table){ + return pcre2_set_character_tables_8(ccontext, table); + } + static Pcre2Type<8>::Pcre2Code * compile(Pcre2Type<8>::Pcre2Sptr pattern, + PCRE2_SIZE length, + uint32_t options, + int *errorcode, + PCRE2_SIZE *erroroffset, + Pcre2Type<8>::CompileContext *ccontext){ + return pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext); + } + static int jit_compile(Pcre2Type<8>::Pcre2Code *code, uint32_t options){ + return pcre2_jit_compile_8(code, options); + } + static int substitute( const Pcre2Type<8>::Pcre2Code *code, + Pcre2Type<8>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<8>::MatchData *match_data, + Pcre2Type<8>::MatchContext *mcontext, + Pcre2Type<8>::Pcre2Sptr replacement, + PCRE2_SIZE rlength, + Pcre2Type<8>::Pcre2Uchar *outputbuffer, + PCRE2_SIZE *outlengthptr){ + return pcre2_substitute_8( code, subject, length, startoffset, options, match_data, + mcontext, replacement, rlength, outputbuffer, outlengthptr); + } + //~ static int substring_get_bynumber(Pcre2Type<8>::MatchData *match_data, + //~ uint32_t number, + //~ Pcre2Type<8>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_bynumber_8(match_data, number, bufferptr, bufflen); + //~ } + //~ static int substring_get_byname(Pcre2Type<8>::MatchData *match_data, + //~ Pcre2Type<8>::Pcre2Sptr name, + //~ Pcre2Type<8>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_byname_8(match_data, name, bufferptr, bufflen); + //~ } + //~ static void substring_free(Pcre2Type<8>::Pcre2Uchar *buffer){ + //~ pcre2_substring_free_8(buffer); + //~ } + //~ static Pcre2Type<8>::Pcre2Code * code_copy(const Pcre2Type<8>::Pcre2Code *code){ + //~ return pcre2_code_copy_8(code); + //~ } + static void code_free(Pcre2Type<8>::Pcre2Code *code){ + pcre2_code_free_8(code); + } + static int get_error_message( int errorcode, + Pcre2Type<8>::Pcre2Uchar *buffer, + PCRE2_SIZE bufflen){ + return pcre2_get_error_message_8(errorcode, buffer, bufflen); + } + static Pcre2Type<8>::MatchData * match_data_create_from_pattern( + const Pcre2Type<8>::Pcre2Code *code, + Pcre2Type<8>::GeneralContext *gcontext){ + return pcre2_match_data_create_from_pattern_8(code, gcontext); + + } + static int match( const Pcre2Type<8>::Pcre2Code *code, + Pcre2Type<8>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<8>::MatchData *match_data, + Pcre2Type<8>::MatchContext *mcontext){ + return pcre2_match_8(code, subject, length, startoffset, options, match_data, mcontext); + } + static void match_data_free(Pcre2Type<8>::MatchData *match_data){ + pcre2_match_data_free_8(match_data); + } + static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<8>::MatchData *match_data){ + return pcre2_get_ovector_pointer_8(match_data); + } + static int pattern_info(const Pcre2Type<8>::Pcre2Code *code, uint32_t what, void *where){ + return pcre2_pattern_info_8(code, what, where); + } + static int set_newline(Pcre2Type<8>::CompileContext *ccontext, uint32_t value){ + return pcre2_set_newline_8(ccontext, value); + } + //~ static void jit_stack_assign(Pcre2Type<8>::MatchContext *mcontext, + //~ Pcre2Type<8>::JitCallback callback_function, + //~ void *callback_data){ + //~ pcre2_jit_stack_assign_8(mcontext, callback_function, callback_data); + //~ } + //~ static Pcre2Type<8>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, + //~ Pcre2Type<8>::GeneralContext *gcontext){ + //~ return pcre2_jit_stack_create_8(startsize, maxsize, gcontext); + //~ } + //~ static void jit_stack_free(Pcre2Type<8>::JitStack *jit_stack){ + //~ pcre2_jit_stack_free_8(jit_stack); + //~ } + //~ static void jit_free_unused_memory(Pcre2Type<8>::GeneralContext *gcontext){ + //~ pcre2_jit_free_unused_memory_8(gcontext); + //~ } + //~ static Pcre2Type<8>::MatchContext *match_context_create(Pcre2Type<8>::GeneralContext *gcontext){ + //~ return pcre2_match_context_create_8(gcontext); + //~ } + //~ static Pcre2Type<8>::MatchContext *match_context_copy(Pcre2Type<8>::MatchContext *mcontext){ + //~ return pcre2_match_context_copy_8(mcontext); + //~ } + //~ static void match_context_free(Pcre2Type<8>::MatchContext *mcontext){ + //~ pcre2_match_context_free_8(mcontext); + //~ } + static uint32_t get_ovector_count(Pcre2Type<8>::MatchData *match_data){ + return pcre2_get_ovector_count_8(match_data); + } +}; + +//16-bit version +template<> struct Pcre2Func<16> { + static Pcre2Type<16>::CompileContext* compile_context_create(Pcre2Type<16>::GeneralContext *gcontext){ + return pcre2_compile_context_create_16(gcontext); + } + static void compile_context_free(Pcre2Type<16>::CompileContext *ccontext){ + pcre2_compile_context_free_16(ccontext); + } + static Pcre2Type<16>::CompileContext* compile_context_copy(Pcre2Type<16>::CompileContext* ccontext){ + return pcre2_compile_context_copy_16(ccontext); + } + static const unsigned char * maketables(Pcre2Type<16>::GeneralContext* gcontext){ + return pcre2_maketables_16(gcontext); + } + static int set_character_tables(Pcre2Type<16>::CompileContext * ccontext, const unsigned char * table){ + return pcre2_set_character_tables_16(ccontext, table); + } + static Pcre2Type<16>::Pcre2Code * compile(Pcre2Type<16>::Pcre2Sptr pattern, + PCRE2_SIZE length, + uint32_t options, + int *errorcode, + PCRE2_SIZE *erroroffset, + Pcre2Type<16>::CompileContext *ccontext){ + return pcre2_compile_16(pattern, length, options, errorcode, erroroffset, ccontext); + } + static int jit_compile(Pcre2Type<16>::Pcre2Code *code, uint32_t options){ + return pcre2_jit_compile_16(code, options); + } + static int substitute( const Pcre2Type<16>::Pcre2Code *code, + Pcre2Type<16>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<16>::MatchData *match_data, + Pcre2Type<16>::MatchContext *mcontext, + Pcre2Type<16>::Pcre2Sptr replacement, + PCRE2_SIZE rlength, + Pcre2Type<16>::Pcre2Uchar *outputbuffer, + PCRE2_SIZE *outlengthptr){ + return pcre2_substitute_16( code, subject, length, startoffset, options, match_data, + mcontext, replacement, rlength, outputbuffer, outlengthptr); + } + //~ static int substring_get_bynumber(Pcre2Type<16>::MatchData *match_data, + //~ uint32_t number, + //~ Pcre2Type<16>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_bynumber_16(match_data, number, bufferptr, bufflen); + //~ } + //~ static int substring_get_byname(Pcre2Type<16>::MatchData *match_data, + //~ Pcre2Type<16>::Pcre2Sptr name, + //~ Pcre2Type<16>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_byname_16(match_data, name, bufferptr, bufflen); + //~ } + //~ static void substring_free(Pcre2Type<16>::Pcre2Uchar *buffer){ + //~ pcre2_substring_free_16(buffer); + //~ } + //~ static Pcre2Type<16>::Pcre2Code * code_copy(const Pcre2Type<16>::Pcre2Code *code){ + //~ return pcre2_code_copy_16(code); + //~ } + static void code_free(Pcre2Type<16>::Pcre2Code *code){ + pcre2_code_free_16(code); + } + static int get_error_message( int errorcode, + Pcre2Type<16>::Pcre2Uchar *buffer, + PCRE2_SIZE bufflen){ + return pcre2_get_error_message_16(errorcode, buffer, bufflen); + } + static Pcre2Type<16>::MatchData * match_data_create_from_pattern( + const Pcre2Type<16>::Pcre2Code *code, + Pcre2Type<16>::GeneralContext *gcontext){ + return pcre2_match_data_create_from_pattern_16(code, gcontext); + + } + static int match( const Pcre2Type<16>::Pcre2Code *code, + Pcre2Type<16>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<16>::MatchData *match_data, + Pcre2Type<16>::MatchContext *mcontext){ + return pcre2_match_16(code, subject, length, startoffset, options, match_data, mcontext); + } + static void match_data_free(Pcre2Type<16>::MatchData *match_data){ + pcre2_match_data_free_16(match_data); + } + static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<16>::MatchData *match_data){ + return pcre2_get_ovector_pointer_16(match_data); + } + static int pattern_info(const Pcre2Type<16>::Pcre2Code *code, uint32_t what, void *where){ + return pcre2_pattern_info_16(code, what, where); + } + static int set_newline(Pcre2Type<16>::CompileContext *ccontext, uint32_t value){ + return pcre2_set_newline_16(ccontext, value); + } + //~ static void jit_stack_assign(Pcre2Type<16>::MatchContext *mcontext, + //~ Pcre2Type<16>::JitCallback callback_function, + //~ void *callback_data){ + //~ pcre2_jit_stack_assign_16(mcontext, callback_function, callback_data); + //~ } + //~ static Pcre2Type<16>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, + //~ Pcre2Type<16>::GeneralContext *gcontext){ + //~ return pcre2_jit_stack_create_16(startsize, maxsize, gcontext); + //~ } + //~ static void jit_stack_free(Pcre2Type<16>::JitStack *jit_stack){ + //~ pcre2_jit_stack_free_16(jit_stack); + //~ } + //~ static void jit_free_unused_memory(Pcre2Type<16>::GeneralContext *gcontext){ + //~ pcre2_jit_free_unused_memory_16(gcontext); + //~ } + //~ static Pcre2Type<16>::MatchContext *match_context_create(Pcre2Type<16>::GeneralContext *gcontext){ + //~ return pcre2_match_context_create_16(gcontext); + //~ } + //~ static Pcre2Type<16>::MatchContext *match_context_copy(Pcre2Type<16>::MatchContext *mcontext){ + //~ return pcre2_match_context_copy_16(mcontext); + //~ } + //~ static void match_context_free(Pcre2Type<16>::MatchContext *mcontext){ + //~ pcre2_match_context_free_16(mcontext); + //~ } + static uint32_t get_ovector_count(Pcre2Type<16>::MatchData *match_data){ + return pcre2_get_ovector_count_16(match_data); + } +}; + +//32-bit version +template<> struct Pcre2Func<32> { + static Pcre2Type<32>::CompileContext* compile_context_create(Pcre2Type<32>::GeneralContext *gcontext){ + return pcre2_compile_context_create_32(gcontext); + } + static void compile_context_free(Pcre2Type<32>::CompileContext *ccontext){ + pcre2_compile_context_free_32(ccontext); + } + static Pcre2Type<32>::CompileContext* compile_context_copy(Pcre2Type<32>::CompileContext* ccontext){ + return pcre2_compile_context_copy_32(ccontext); + } + static const unsigned char * maketables(Pcre2Type<32>::GeneralContext* gcontext){ + return pcre2_maketables_32(gcontext); + } + static int set_character_tables(Pcre2Type<32>::CompileContext * ccontext, const unsigned char * table){ + return pcre2_set_character_tables_32(ccontext, table); + } + static Pcre2Type<32>::Pcre2Code * compile(Pcre2Type<32>::Pcre2Sptr pattern, + PCRE2_SIZE length, + uint32_t options, + int *errorcode, + PCRE2_SIZE *erroroffset, + Pcre2Type<32>::CompileContext *ccontext){ + return pcre2_compile_32(pattern, length, options, errorcode, erroroffset, ccontext); + } + static int jit_compile(Pcre2Type<32>::Pcre2Code *code, uint32_t options){ + return pcre2_jit_compile_32(code, options); + } + static int substitute( const Pcre2Type<32>::Pcre2Code *code, + Pcre2Type<32>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<32>::MatchData *match_data, + Pcre2Type<32>::MatchContext *mcontext, + Pcre2Type<32>::Pcre2Sptr replacement, + PCRE2_SIZE rlength, + Pcre2Type<32>::Pcre2Uchar *outputbuffer, + PCRE2_SIZE *outlengthptr){ + return pcre2_substitute_32( code, subject, length, startoffset, options, match_data, + mcontext, replacement, rlength, outputbuffer, outlengthptr); + } + //~ static int substring_get_bynumber(Pcre2Type<32>::MatchData *match_data, + //~ uint32_t number, + //~ Pcre2Type<32>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_bynumber_32(match_data, number, bufferptr, bufflen); + //~ } + //~ static int substring_get_byname(Pcre2Type<32>::MatchData *match_data, + //~ Pcre2Type<32>::Pcre2Sptr name, + //~ Pcre2Type<32>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_byname_32(match_data, name, bufferptr, bufflen); + //~ } + //~ static void substring_free(Pcre2Type<32>::Pcre2Uchar *buffer){ + //~ pcre2_substring_free_32(buffer); + //~ } + //~ static Pcre2Type<32>::Pcre2Code * code_copy(const Pcre2Type<32>::Pcre2Code *code){ + //~ return pcre2_code_copy_32(code); + //~ } + static void code_free(Pcre2Type<32>::Pcre2Code *code){ + pcre2_code_free_32(code); + } + static int get_error_message( int errorcode, + Pcre2Type<32>::Pcre2Uchar *buffer, + PCRE2_SIZE bufflen){ + return pcre2_get_error_message_32(errorcode, buffer, bufflen); + } + static Pcre2Type<32>::MatchData * match_data_create_from_pattern( + const Pcre2Type<32>::Pcre2Code *code, + Pcre2Type<32>::GeneralContext *gcontext){ + return pcre2_match_data_create_from_pattern_32(code, gcontext); + + } + static int match( const Pcre2Type<32>::Pcre2Code *code, + Pcre2Type<32>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<32>::MatchData *match_data, + Pcre2Type<32>::MatchContext *mcontext){ + return pcre2_match_32(code, subject, length, startoffset, options, match_data, mcontext); + } + static void match_data_free(Pcre2Type<32>::MatchData *match_data){ + pcre2_match_data_free_32(match_data); + } + static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<32>::MatchData *match_data){ + return pcre2_get_ovector_pointer_32(match_data); + } + static int pattern_info(const Pcre2Type<32>::Pcre2Code *code, uint32_t what, void *where){ + return pcre2_pattern_info_32(code, what, where); + } + static int set_newline(Pcre2Type<32>::CompileContext *ccontext, uint32_t value){ + return pcre2_set_newline_32(ccontext, value); + } + //~ static void jit_stack_assign(Pcre2Type<32>::MatchContext *mcontext, + //~ Pcre2Type<32>::JitCallback callback_function, + //~ void *callback_data){ + //~ pcre2_jit_stack_assign_32(mcontext, callback_function, callback_data); + //~ } + //~ static Pcre2Type<32>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, + //~ Pcre2Type<32>::GeneralContext *gcontext){ + //~ return pcre2_jit_stack_create_32(startsize, maxsize, gcontext); + //~ } + //~ static void jit_stack_free(Pcre2Type<32>::JitStack *jit_stack){ + //~ pcre2_jit_stack_free_32(jit_stack); + //~ } + //~ static void jit_free_unused_memory(Pcre2Type<32>::GeneralContext *gcontext){ + //~ pcre2_jit_free_unused_memory_32(gcontext); + //~ } + //~ static Pcre2Type<32>::MatchContext *match_context_create(Pcre2Type<32>::GeneralContext *gcontext){ + //~ return pcre2_match_context_create_32(gcontext); + //~ } + //~ static Pcre2Type<32>::MatchContext *match_context_copy(Pcre2Type<32>::MatchContext *mcontext){ + //~ return pcre2_match_context_copy_32(mcontext); + //~ } + //~ static void match_context_free(Pcre2Type<32>::MatchContext *mcontext){ + //~ pcre2_match_context_free_32(mcontext); + //~ } + static uint32_t get_ovector_count(Pcre2Type<32>::MatchData *match_data){ + return pcre2_get_ovector_count_32(match_data); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +///Class to take a std::string modifier value with null safety. +///You don't need to make an instance of this class to pass modifier, +///just pass std::string or char const*, whatever seems feasible, +///implicit conversion will kick in and take care of things for you. +class Modifier{ + std::string mod; + + public: + ///Default constructor. + Modifier(){} + + ///Constructor that takes a std::string. + ///@param x std::string as a reference. + Modifier(std::string const& x):mod(x){} + + ///Constructor that takes char const * (null safety is provided by this one) + ///@param x char const * + Modifier(char const *x):mod(x?x:""){} + + ///Returns the modifier string + ///@return modifier string (std::string) + std::string str() const { return mod; } + + ///Returns the c_str() of modifier string + ///@return char const * + char const * c_str() const { return mod.c_str(); } + + ///Returns the length of the modifier string + ///@return length + SIZE_T length() const{ return mod.length(); } + + ///operator[] overload to access character by index. + ///@param i index + ///@return character at index i. + char operator[](SIZE_T i) const { return mod[i]; } +}; + + +// Namespace for modifier constants. +// For each modifier constant there is a jpcre2::Uint option value. +// Some modifiers may have multiple values set together (ORed in bitwise operation) and +// thus they may include other modifiers. Such an example is the 'n' modifier. It is combined together with 'u'. +namespace MOD { + + // Define modifiers for compile + // String of compile modifier characters for PCRE2 options + static const char C_N[] = "eijmnsuxADJU"; + // Array of compile modifier values for PCRE2 options + // Uint is being used in getModifier() in for loop to get the number of element in this array, + // be sure to chnage there if you change here. + static const jpcre2::Uint C_V[12] = { PCRE2_MATCH_UNSET_BACKREF, // Modifier e + PCRE2_CASELESS, // Modifier i + PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF, // Modifier j + PCRE2_MULTILINE, // Modifier m + PCRE2_UTF | PCRE2_UCP, // Modifier n (includes u) + PCRE2_DOTALL, // Modifier s + PCRE2_UTF, // Modifier u + PCRE2_EXTENDED, // Modifier x + PCRE2_ANCHORED, // Modifier A + PCRE2_DOLLAR_ENDONLY, // Modifier D + PCRE2_DUPNAMES, // Modifier J + PCRE2_UNGREEDY // Modifier U + }; + + + // String of compile modifier characters for JPCRE2 options + static const char CJ_N[] = "S"; + // Array of compile modifier values for JPCRE2 options + static const jpcre2::Uint CJ_V[1] = { JIT_COMPILE, // Modifier S + }; + + + // Define modifiers for replace + // String of action (replace) modifier characters for PCRE2 options + static const char R_N[] = "eEgx"; + // Array of action (replace) modifier values for PCRE2 options + static const jpcre2::Uint R_V[4] = { PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier e + PCRE2_SUBSTITUTE_UNKNOWN_UNSET | PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier E (includes e) + PCRE2_SUBSTITUTE_GLOBAL, // Modifier g + PCRE2_SUBSTITUTE_EXTENDED // Modifier x + }; + + + // String of action (replace) modifier characters for JPCRE2 options + static const char RJ_N[] = ""; + // Array of action (replace) modifier values for JPCRE2 options + static const jpcre2::Uint RJ_V[1] = { NONE //placeholder + }; + + // Define modifiers for match + // String of action (match) modifier characters for PCRE2 options + static const char M_N[] = "A"; + // Array of action (match) modifier values for PCRE2 options + static const jpcre2::Uint M_V[1] = { PCRE2_ANCHORED // Modifier A + }; + + + // String of action (match) modifier characters for JPCRE2 options + static const char MJ_N[] = "g"; + // Array of action (match) modifier values for JPCRE2 options + static const jpcre2::Uint MJ_V[1] = { FIND_ALL, // Modifier g + }; + + static inline void toOption(Modifier const& mod, bool x, + Uint const * J_V, char const * J_N, SIZE_T SJ, + Uint const * V, char const * N, SIZE_T S, + Uint* po, Uint* jo, + int* en, SIZE_T* eo + ){ + //loop through mod + SIZE_T n = mod.length(); + for (SIZE_T i = 0; i < n; ++i) { + //First check for JPCRE2 mods + for(SIZE_T j = 0; j < SJ; ++j){ + if(J_N[j] == mod[i]) { + if(x) *jo |= J_V[j]; + else *jo &= ~J_V[j]; + goto endfor; + } + } + + //Now check for PCRE2 mods + for(SIZE_T j = 0; j< S; ++j){ + if(N[j] == mod[i]){ + if(x) *po |= V[j]; + else *po &= ~V[j]; + goto endfor; + } + } + + //Modifier didn't match, invalid modifier + *en = (int)ERROR::INVALID_MODIFIER; + *eo = (int)mod[i]; + + endfor:; + } + } + + static inline void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ + toOption(mod, x, + MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint), + M_V, M_N, sizeof(M_V)/sizeof(Uint), + po, jo, en, eo); + } + + static inline void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ + toOption(mod, x, + RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint), + R_V, R_N, sizeof(R_V)/sizeof(Uint), + po, jo, en, eo); + } + + static inline void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ + toOption(mod, x, + CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint), + C_V, C_N, sizeof(C_V)/sizeof(Uint), + po, jo, en, eo); + } + + static inline std::string fromOption(Uint const * J_V, char const * J_N, SIZE_T SJ, + Uint const * V, char const * N, SIZE_T S, + Uint po, Uint jo + ){ + std::string mod; + //Calculate PCRE2 mod + for(SIZE_T i = 0; i < S; ++i){ + if( (V[i] & po) != 0 && + (V[i] & po) == V[i]) //One option can include other + mod += N[i]; + } + //Calculate JPCRE2 mod + for(SIZE_T i = 0; i < SJ; ++i){ + if( (J_V[i] & jo) != 0 && + (J_V[i] & jo) == J_V[i]) //One option can include other + mod += J_N[i]; + } + return mod; + } + + static inline std::string fromMatchOption(Uint po, Uint jo){ + return fromOption(MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint), + M_V, M_N, sizeof(M_V)/sizeof(Uint), + po, jo); + } + + static inline std::string fromReplaceOption(Uint po, Uint jo){ + return fromOption(RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint), + R_V, R_N, sizeof(R_V)/sizeof(Uint), + po, jo); + } + + static inline std::string fromCompileOption(Uint po, Uint jo){ + return fromOption(CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint), + C_V, C_N, sizeof(C_V)/sizeof(Uint), + po, jo); + } + +} //MOD namespace ends + +///Lets you create custom modifier tables. +///An instance of this class can be passed to +///match, replace or compile related class objects. +class ModifierTable{ + + std::string tabjms; + std::string tabms; + std::string tabjrs; + std::string tabrs; + std::string tabjcs; + std::string tabcs; + VecOpt tabjmv; + VecOpt tabmv; + VecOpt tabjrv; + VecOpt tabrv; + VecOpt tabjcv; + VecOpt tabcv; + + void toOption(Modifier const& mod, bool x, + VecOpt const& J_V, std::string const& J_N, + VecOpt const& V, std::string const& N, + Uint* po, Uint* jo, int* en, SIZE_T* eo + ) const{ + SIZE_T SJ = J_V.size(); + SIZE_T S = V.size(); + JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str()); + JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str()); + MOD::toOption(mod, x, + J_V.empty()?0:&J_V[0], J_N.c_str(), SJ, + V.empty()?0:&V[0], N.c_str(), S, + po, jo, en, eo + ); + } + + std::string fromOption(VecOpt const& J_V, std::string const& J_N, + VecOpt const& V, std::string const& N, + Uint po, Uint jo) const{ + SIZE_T SJ = J_V.size(); + SIZE_T S = V.size(); + JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str()); + JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str()); + return MOD::fromOption(J_V.empty()?0:&J_V[0], J_N.c_str(), SJ, + V.empty()?0:&V[0], N.c_str(), S, + po, jo); + } + + void parseModifierTable(std::string& tabjs, VecOpt& tabjv, + std::string& tab_s, VecOpt& tab_v, + std::string const& tabs, VecOpt const& tabv); + public: + + ///Default constructor that creates an empty modifier table. + ModifierTable(){} + + ///@overload + ///@param deflt Initialize with default table if true, otherwise keep empty. + ModifierTable(bool deflt){ + if(deflt) setAllToDefault(); + } + + ///Reset the match modifier table to its initial (empty) state including memory. + ///@return A reference to the calling ModifierTable object. + ModifierTable& resetMatchModifierTable(){ + std::string().swap(tabjms); + std::string().swap(tabms); + VecOpt().swap(tabjmv); + VecOpt().swap(tabmv); + return *this; + } + + ///Reset the replace modifier table to its initial (empty) state including memory. + ///@return A reference to the calling ModifierTable object. + ModifierTable& resetReplaceModifierTable(){ + std::string().swap(tabjrs); + std::string().swap(tabrs); + VecOpt().swap(tabjrv); + VecOpt().swap(tabrv); + return *this; + } + + ///Reset the compile modifier table to its initial (empty) state including memory. + ///@return A reference to the calling ModifierTable object. + ModifierTable& resetCompileModifierTable(){ + std::string().swap(tabjcs); + std::string().swap(tabcs); + VecOpt().swap(tabjcv); + VecOpt().swap(tabcv); + return *this; + } + + ///Reset the modifier tables to their initial (empty) state including memory. + ///@return A reference to the calling ModifierTable object. + ModifierTable& reset(){ + resetMatchModifierTable(); + resetReplaceModifierTable(); + resetCompileModifierTable(); + return *this; + } + + ///Clear the match modifier table to its initial (empty) state. + ///Memory may retain for further use. + ///@return A reference to the calling ModifierTable object. + ModifierTable& clearMatchModifierTable(){ + tabjms.clear(); + tabms.clear(); + tabjmv.clear(); + tabmv.clear(); + return *this; + } + + ///Clear the replace modifier table to its initial (empty) state. + ///Memory may retain for further use. + ///@return A reference to the calling ModifierTable object. + ModifierTable& clearReplaceModifierTable(){ + tabjrs.clear(); + tabrs.clear(); + tabjrv.clear(); + tabrv.clear(); + return *this; + } + + ///Clear the compile modifier table to its initial (empty) state. + ///Memory may retain for further use. + ///@return A reference to the calling ModifierTable object. + ModifierTable& clearCompileModifierTable(){ + tabjcs.clear(); + tabcs.clear(); + tabjcv.clear(); + tabcv.clear(); + return *this; + } + + ///Clear the modifier tables to their initial (empty) state. + ///Memory may retain for further use. + ///@return A reference to the calling ModifierTable object. + ModifierTable& clear(){ + clearMatchModifierTable(); + clearReplaceModifierTable(); + clearCompileModifierTable(); + return *this; + } + + ///Modifier parser for match related options. + ///@param mod modifier string + ///@param x whether to add or remove the modifers. + ///@param po pointer to PCRE2 match option that will be modified. + ///@param jo pointer to JPCRE2 match option that will be modified. + ///@param en where to put the error number. + ///@param eo where to put the error offset. + void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { + toOption(mod, x,tabjmv,tabjms,tabmv, tabms,po,jo,en,eo); + } + + ///Modifier parser for replace related options. + ///@param mod modifier string + ///@param x whether to add or remove the modifers. + ///@param po pointer to PCRE2 replace option that will be modified. + ///@param jo pointer to JPCRE2 replace option that will be modified. + ///@param en where to put the error number. + ///@param eo where to put the error offset. + void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { + return toOption(mod, x,tabjrv,tabjrs,tabrv,tabrs,po,jo,en,eo); + } + + ///Modifier parser for compile related options. + ///@param mod modifier string + ///@param x whether to add or remove the modifers. + ///@param po pointer to PCRE2 compile option that will be modified. + ///@param jo pointer to JPCRE2 compile option that will be modified. + ///@param en where to put the error number. + ///@param eo where to put the error offset. + void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { + return toOption(mod, x,tabjcv,tabjcs,tabcv,tabcs,po,jo,en,eo); + } + + ///Take match related option value and convert to modifier string. + ///@param po PCRE2 option. + ///@param jo JPCRE2 option. + ///@return modifier string (std::string) + std::string fromMatchOption(Uint po, Uint jo) const { + return fromOption(tabjmv,tabjms,tabmv,tabms,po,jo); + } + + ///Take replace related option value and convert to modifier string. + ///@param po PCRE2 option. + ///@param jo JPCRE2 option. + ///@return modifier string (std::string) + std::string fromReplaceOption(Uint po, Uint jo) const { + return fromOption(tabjrv,tabjrs,tabrv,tabrs,po,jo); + } + + ///Take compile related option value and convert to modifier string. + ///@param po PCRE2 option. + ///@param jo JPCRE2 option. + ///@return modifier string (std::string) + std::string fromCompileOption(Uint po, Uint jo) const { + return fromOption(tabjcv,tabjcs,tabcv,tabcs,po,jo); + } + + ///Set modifier table for match. + ///Takes a string and a vector of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabv vector of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setMatchModifierTable(std::string const& tabs, VecOpt const& tabv){ + parseModifierTable(tabjms, tabjmv, tabms, tabmv, tabs, tabv); + return *this; + } + + ///Set modifier table for match. + ///Takes a string and an array of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabvp array of Uint (options). If null, table is set to empty. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setMatchModifierTable(std::string const& tabs, const Uint* tabvp){ + if(tabvp) { + VecOpt tabv(tabvp, tabvp + tabs.length()); + setMatchModifierTable(tabs, tabv); + } else clearMatchModifierTable(); + return *this; + } + + ///@overload + ///... + ///This one takes modifier and value by array. + ///If the arrays are not of the same length, the behavior is undefined. + ///If any of the argument is null, the table is set empty. + ///@param tabsp modifier string (list of modifiers). + ///@param tabvp array of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setMatchModifierTable(const char* tabsp, const Uint* tabvp){ + if(tabsp && tabvp) { + std::string tabs(tabsp); + VecOpt tabv(tabvp, tabvp + tabs.length()); + setMatchModifierTable(tabs, tabv); + } else clearMatchModifierTable(); + return *this; + } + + ///Set modifier table for replace. + ///Takes a string and a vector of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabv vector of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setReplaceModifierTable(std::string const& tabs, VecOpt const& tabv){ + parseModifierTable(tabjrs, tabjrv, tabrs, tabrv, tabs, tabv); + return *this; + } + + ///Set modifier table for replace. + ///Takes a string and an array of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabvp array of Uint (options). If null, table is set to empty. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setReplaceModifierTable(std::string const& tabs, const Uint* tabvp){ + if(tabvp) { + VecOpt tabv(tabvp, tabvp + tabs.length()); + setReplaceModifierTable(tabs, tabv); + } else clearReplaceModifierTable(); + return *this; + } + + ///@overload + ///... + ///This one takes modifier and value by array. + ///If the arrays are not of the same length, the behavior is undefined. + ///If any of the argument is null, the table is set empty. + ///@param tabsp modifier string (list of modifiers). + ///@param tabvp array of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setReplaceModifierTable(const char* tabsp, const Uint* tabvp){ + if(tabsp && tabvp) { + std::string tabs(tabsp); + VecOpt tabv(tabvp, tabvp + tabs.length()); + setReplaceModifierTable(tabs, tabv); + } else clearReplaceModifierTable(); + return *this; + } + + ///Set modifier table for compile. + ///Takes a string and a vector of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabv vector of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setCompileModifierTable(std::string const& tabs, VecOpt const& tabv){ + parseModifierTable(tabjcs, tabjcv, tabcs, tabcv, tabs, tabv); + return *this; + } + + ///Set modifier table for compile. + ///Takes a string and an array of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabvp array of Uint (options). If null, table is set to empty. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setCompileModifierTable(std::string const& tabs, const Uint* tabvp){ + if(tabvp) { + VecOpt tabv(tabvp, tabvp + tabs.length()); + setCompileModifierTable(tabs, tabv); + } else clearCompileModifierTable(); + return *this; + } + + ///@overload + ///... + ///This one takes modifier and value by array. + ///If the arrays are not of the same length, the behavior is undefined. + ///If any of the argument is null, the table is set empty. + ///@param tabsp modifier string (list of modifiers). + ///@param tabvp array of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setCompileModifierTable(const char* tabsp, const Uint* tabvp){ + if(tabsp && tabvp) { + std::string tabs(tabsp); + VecOpt tabv(tabvp, tabvp + tabs.length()); + setCompileModifierTable(tabs, tabv); + } else clearCompileModifierTable(); + return *this; + } + + ///Set match modifie table to default + ///@return A reference to the calling ModifierTable object. + ModifierTable& setMatchModifierTableToDefault(){ + tabjms = std::string(MOD::MJ_N, MOD::MJ_N + sizeof(MOD::MJ_V)/sizeof(Uint)); + tabms = std::string(MOD::M_N, MOD::M_N + sizeof(MOD::M_V)/sizeof(Uint)); + tabjmv = VecOpt(MOD::MJ_V, MOD::MJ_V + sizeof(MOD::MJ_V)/sizeof(Uint)); + tabmv = VecOpt(MOD::M_V, MOD::M_V + sizeof(MOD::M_V)/sizeof(Uint)); + return *this; + } + + ///Set replace modifier table to default. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setReplaceModifierTableToDefault(){ + tabjrs = std::string(MOD::RJ_N, MOD::RJ_N + sizeof(MOD::RJ_V)/sizeof(Uint)); + tabrs = std::string(MOD::R_N, MOD::R_N + sizeof(MOD::R_V)/sizeof(Uint)); + tabjrv = VecOpt(MOD::RJ_V, MOD::RJ_V + sizeof(MOD::RJ_V)/sizeof(Uint)); + tabrv = VecOpt(MOD::R_V, MOD::R_V + sizeof(MOD::R_V)/sizeof(Uint)); + return *this; + } + + ///Set compile modifier table to default. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setCompileModifierTableToDefault(){ + tabjcs = std::string(MOD::CJ_N, MOD::CJ_N + sizeof(MOD::CJ_V)/sizeof(Uint)); + tabcs = std::string(MOD::C_N, MOD::C_N + sizeof(MOD::C_V)/sizeof(Uint)); + tabjcv = VecOpt(MOD::CJ_V, MOD::CJ_V + sizeof(MOD::CJ_V)/sizeof(Uint)); + tabcv = VecOpt(MOD::C_V, MOD::C_V + sizeof(MOD::C_V)/sizeof(Uint)); + return *this; + } + + ///Set all tables to default. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setAllToDefault(){ + setMatchModifierTableToDefault(); + setReplaceModifierTableToDefault(); + setCompileModifierTableToDefault(); + return *this; + } +}; + + +//These message strings are used for error/warning message construction. +//take care to prevent multiple definition +template struct MSG{ + static std::basic_string INVALID_MODIFIER(void); + static std::basic_string INSUFFICIENT_OVECTOR(void); +}; +//specialization +template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return "Invalid modifier: "; } +template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return L"Invalid modifier: "; } +template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return "ovector wasn't big enough"; } +template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return L"ovector wasn't big enough"; } +#ifdef JPCRE2_USE_MINIMUM_CXX_11 +template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return u"Invalid modifier: "; } +template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return U"Invalid modifier: "; } +template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return u"ovector wasn't big enough"; } +template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return U"ovector wasn't big enough"; } +#endif + +///struct to select the types. +/// +///@tparam Char_T Character type (`char`, `wchar_t`, `char16_t`, `char32_t`) +///@tparam Map Optional parameter (Only `>= C++11`) to specify a map container (`std::map`, `std::unordered_map` etc..). Default is `std::map`. +/// +///The character type (`Char_T`) must be in accordance with the PCRE2 library you are linking against. +///If not sure which library you need, link against all 3 PCRE2 libraries and they will be used as needed. +/// +///If you want to be specific, then here's the rule: +/// +///1. If `Char_T` is 8 bit, you need 8 bit PCRE2 library +///2. If `Char_T` is 16 bit, you need 16 bit PCRE2 library +///3. If `Char_T` is 32 bit, you need 32 bit PCRE2 library +///4. if `Char_T` is not 8 or 16 or 32 bit, you will get compile error. +/// +///In `>= C++11` you get an additional optional template parameter to specify a map container. +///For example, you can use `std::unordered_map` instead of the default `std::map`: +/// ```cpp +/// #include +/// typedef jpcre2::select jp; +/// ``` +/// +///We will use the following typedef throughout this doc: +///```cpp +///typedef jpcre2::select jp; +///``` +#ifdef JPCRE2_USE_MINIMUM_CXX_11 +template class Map=std::map> +#else +template +#endif +struct select{ + + ///Typedef for character (`char`, `wchar_t`, `char16_t`, `char32_t`) + typedef Char_T Char; + + //typedef Char_T Char; + ///Typedef for string (`std::string`, `std::wstring`, `std::u16string`, `std::u32string`). + ///Defined as `std::basic_string`. + ///May be this list will make more sense: + ///Character | String + ///--------- | ------- + ///char | std::string + ///wchar_t | std::wstring + ///char16_t | std::u16string (>=C++11) + ///char32_t | std::u32string (>=C++11) + typedef typename std::basic_string String; + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + ///Map for Named substrings. + typedef class Map MapNas; + ///Substring name to Substring number map. + typedef class Map MapNtN; + #else + ///Map for Named substrings. + typedef typename std::map MapNas; + ///Substring name to Substring number map. + typedef typename std::map MapNtN; + #endif + + ///Allow spelling mistake of MapNtN as MapNtn. + typedef MapNtN MapNtn; + + ///Vector for Numbered substrings (Sub container). + #ifdef JPCRE2_UNSET_CAPTURES_NULL + typedef typename std::vector> NumSub; + #else + typedef typename std::vector NumSub; + #endif + ///Vector of matches with named substrings. + typedef typename std::vector VecNas; + ///Vector of substring name to substring number map. + typedef typename std::vector VecNtN; + ///Allow spelling mistake of VecNtN as VecNtn. + typedef VecNtN VecNtn; + ///Vector of matches with numbered substrings. + typedef typename std::vector VecNum; + + //These are to shorten the code + typedef typename Pcre2Type::Pcre2Uchar Pcre2Uchar; + typedef typename Pcre2Type::Pcre2Sptr Pcre2Sptr; + typedef typename Pcre2Type::Pcre2Code Pcre2Code; + typedef typename Pcre2Type::CompileContext CompileContext; + typedef typename Pcre2Type::MatchData MatchData; + typedef typename Pcre2Type::GeneralContext GeneralContext; + typedef typename Pcre2Type::MatchContext MatchContext; + typedef typename Pcre2Type::JitCallback JitCallback; + typedef typename Pcre2Type::JitStack JitStack; + + template + static String toString(T); //prevent implicit type conversion of T + + ///Converts a Char_T to jpcre2::select::String + ///@param a Char_T + ///@return jpcre2::select::String + static String toString(Char a){ + return a?String(1, a):String(); + } + + ///@overload + ///... + ///Converts a Char_T const * to jpcre2::select::String + ///@param a Char_T const * + ///@return jpcre2::select::String + static String toString(Char const *a){ + return a?String(a):String(); + } + + ///@overload + ///... + ///Converts a Char_T* to jpcre2::select::String + ///@param a Char_T const * + ///@return jpcre2::select::String + static String toString(Char* a){ + return a?String(a):String(); + } + + ///@overload + ///... + ///Converts a PCRE2_UCHAR to String + ///@param a PCRE2_UCHAR + ///@return jpcre2::select::String + static String toString(Pcre2Uchar* a) { + return a?String((Char*) a):String(); + } + + ///Retruns error message from PCRE2 error number + ///@param err_num error number (negative) + ///@return message as jpcre2::select::String. + static String getPcre2ErrorMessage(int err_num) { + Pcre2Uchar buffer[sizeof(Char)*CHAR_BIT*1024]; + Pcre2Func::get_error_message(err_num, buffer, sizeof(buffer)); + return toString((Pcre2Uchar*) buffer); + } + + ///Returns error message (either JPCRE2 or PCRE2) from error number and error offset + ///@param err_num error number (negative for PCRE2, positive for JPCRE2) + ///@param err_off error offset + ///@return message as jpcre2::select::String. + static String getErrorMessage(int err_num, int err_off) { + if(err_num == (int)ERROR::INVALID_MODIFIER){ + return MSG::INVALID_MODIFIER() + toString((Char)err_off); + } else if(err_num == (int)ERROR::INSUFFICIENT_OVECTOR){ + return MSG::INSUFFICIENT_OVECTOR(); + } else if(err_num != 0) { + return getPcre2ErrorMessage((int) err_num); + } else return String(); + } + + //forward declaration + class Regex; + class RegexMatch; + class RegexReplace; + class MatchEvaluator; + + /** Provides public constructors to create RegexMatch objects. + * Every RegexMatch object should be associated with a Regex object. + * This class stores a pointer to its' associated Regex object, thus when + * the content of the associated Regex object is changed, there will be no need to + * set the pointer again. + * + * Examples: + * + * ```cpp + * jp::Regex re; + * jp::RegexMatch rm; + * rm.setRegexObject(&re); + * rm.match("subject", "g"); // 0 match + * re.compile("\\w"); + * rm.match(); // 7 matches + * ``` + */ + class RegexMatch { + + private: + + friend class MatchEvaluator; + + Regex const *re; + + String m_subject; + String const *m_subject_ptr; + Uint match_opts; + Uint jpcre2_match_opts; + MatchContext *mcontext; + ModifierTable const * modtab; + MatchData * mdata; + + PCRE2_SIZE _start_offset; //name collision, use _ at start + + VecNum* vec_num; + VecNas* vec_nas; + VecNtN* vec_ntn; + + VecOff* vec_soff; + VecOff* vec_eoff; + + bool getNumberedSubstrings(int, Pcre2Sptr, PCRE2_SIZE*, uint32_t); + + bool getNamedSubstrings(int, int, Pcre2Sptr, Pcre2Sptr, PCRE2_SIZE*); + + void init_vars() { + re = 0; + vec_num = 0; + vec_nas = 0; + vec_ntn = 0; + vec_soff = 0; + vec_eoff = 0; + match_opts = 0; + jpcre2_match_opts = 0; + error_number = 0; + error_offset = 0; + _start_offset = 0; + m_subject_ptr = &m_subject; + mcontext = 0; + modtab = 0; + mdata = 0; + } + + void onlyCopy(RegexMatch const &rm){ + re = rm.re; //only pointer should be copied + + //pointer to subject may point to m_subject or other user data + m_subject_ptr = (rm.m_subject_ptr == &rm.m_subject) ? &m_subject //not &rm.m_subject + : rm.m_subject_ptr; + + //underlying data of vectors are not handled by RegexMatch + //thus it's safe to just copy the pointers. + vec_num = rm.vec_num; + vec_nas = rm.vec_nas; + vec_ntn = rm.vec_ntn; + vec_soff = rm.vec_soff; + vec_eoff = rm.vec_eoff; + + match_opts = rm.match_opts; + jpcre2_match_opts = rm.jpcre2_match_opts; + error_number = rm.error_number; + error_offset = rm.error_offset; + _start_offset = rm._start_offset; + mcontext = rm.mcontext; + modtab = rm.modtab; + mdata = rm.mdata; + } + + void deepCopy(RegexMatch const &rm){ + m_subject = rm.m_subject; + onlyCopy(rm); + } + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + void deepMove(RegexMatch& rm){ + m_subject = std::move_if_noexcept(rm.m_subject); + onlyCopy(rm); + } + #endif + + friend class Regex; + + protected: + + int error_number; + PCRE2_SIZE error_offset; + + public: + + ///Default constructor. + RegexMatch(){ + init_vars(); + } + + ///@overload + ///... + ///Creates a RegexMatch object associating a Regex object. + ///Underlying data is not modified. + ///@param r pointer to a Regex object + RegexMatch(Regex const *r) { + init_vars(); + re = r; + } + + ///@overload + ///... + ///Copy constructor. + ///@param rm Reference to RegexMatch object + RegexMatch(RegexMatch const &rm){ + init_vars(); + deepCopy(rm); + } + + ///Overloaded copy-assignment operator. + ///@param rm RegexMatch object + ///@return A reference to the calling RegexMatch object. + virtual RegexMatch& operator=(RegexMatch const &rm){ + if(this == &rm) return *this; + deepCopy(rm); + return *this; + } + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + ///@overload + ///... + ///Move constructor. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param rm rvalue reference to a RegexMatch object + RegexMatch(RegexMatch&& rm){ + init_vars(); + deepMove(rm); + } + + ///@overload + ///... + ///Overloaded move-assignment operator. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param rm rvalue reference to a RegexMatch object + ///@return A reference to the calling RegexMatch object. + virtual RegexMatch& operator=(RegexMatch&& rm){ + if(this == &rm) return *this; + deepMove(rm); + return *this; + } + #endif + + ///Destructor + ///Frees all internal memories that were used. + virtual ~RegexMatch() {} + + ///Reset all class variables to its default (initial) state including memory. + ///Data in the vectors will retain (as it's external) + ///You will need to pass vector pointers again after calling this function to get match results. + ///@return Reference to the calling RegexMatch object. + virtual RegexMatch& reset() { + String().swap(m_subject); //not ptr , external string won't be modified. + init_vars(); + return *this; + } + + ///Clear all class variables (may retain some memory for further use). + ///Data in the vectors will retain (as it's external) + ///You will need to pass vector pointers again after calling this function to get match results. + ///@return Reference to the calling RegexMatch object. + virtual RegexMatch& clear(){ + m_subject.clear(); //not ptr , external string won't be modified. + init_vars(); + return *this; + } + + ///reset match related errors to zero. + ///If you want to examine the error status of a function call in the method chain, + ///add this function just before your target function so that the error is set to zero + ///before that target function is called, and leave everything out after the target + ///function so that there will be no additional errors from other function calls. + ///@return A reference to the RegexMatch object + ///@see Regex::resetErrors() + ///@see RegexReplace::resetErrors() + virtual RegexMatch& resetErrors(){ + error_number = 0; + error_offset = 0; + return *this; + } + + /// Returns the last error number + ///@return Last error number + virtual int getErrorNumber() const { + return error_number; + } + + /// Returns the last error offset + ///@return Last error offset + virtual int getErrorOffset() const { + return (int)error_offset; + } + + /// Returns the last error message + ///@return Last error message + virtual String getErrorMessage() const { + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + return select::getErrorMessage(error_number, error_offset); + #else + return select::getErrorMessage(error_number, error_offset); + #endif + } + + ///Get subject string (by value). + ///@return subject string + ///@see RegexReplace::getSubject() + virtual String getSubject() const { + return *m_subject_ptr; + } + + ///Get pointer to subject string. + ///Data can not be changed with this pointer. + ///@return constant subject string pointer + ///@see RegexReplace::getSubjectPointer() + virtual String const * getSubjectPointer() const { + return m_subject_ptr; + } + + + /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. + /// + /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized + /// as long as you don't do that explicitly. Calling RegexMatch::setModifier() will re-set them. + /// + /// **Mixed or combined modifier**. + /// + /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers + /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they + /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options + /// get set, and when you remove the 'n' modifier (with `RegexMatch::changeModifier()`), both will get removed. + ///@return Calculated modifier string (std::string) + ///@see Regex::getModifier() + ///@see RegexReplace::getModifier() + virtual std::string getModifier() const { + return modtab ? modtab->fromMatchOption(match_opts, jpcre2_match_opts) + : MOD::fromMatchOption(match_opts, jpcre2_match_opts); + } + + ///Get the modifier table that is set, + ///@return pointer to constant ModifierTable. + virtual ModifierTable const* getModifierTable(){ + return modtab; + } + + + ///Get PCRE2 option + ///@return PCRE2 option for match operation + ///@see Regex::getPcre2Option() + ///@see RegexReplace::getPcre2Option() + virtual Uint getPcre2Option() const { + return match_opts; + } + + /// Get JPCRE2 option + ///@return JPCRE2 options for math operation + ///@see Regex::getJpcre2Option() + ///@see RegexReplace::getJpcre2Option() + virtual Uint getJpcre2Option() const { + return jpcre2_match_opts; + } + + /// Get offset from where match will start in the subject. + /// @return Start offset + virtual PCRE2_SIZE getStartOffset() const { + return _start_offset; + } + + ///Get pre-set match start offset vector pointer. + ///The pointer must be set with RegexMatch::setMatchStartOffsetVector() beforehand + ///for this to work i.e it is just a convenience method to get the pre-set vector pointer. + ///@return pointer to the const match start offset vector + virtual VecOff const* getMatchStartOffsetVector() const { + return vec_soff; + } + + ///Get pre-set match end offset vector pointer. + ///The pointer must be set with RegexMatch::setMatchEndOffsetVector() beforehand + ///for this to work i.e it is just a convenience method to get the pre-set vector pointer. + ///@return pointer to the const end offset vector + virtual VecOff const* getMatchEndOffsetVector() const { + return vec_eoff; + } + + ///Get a pointer to the associated Regex object. + ///If no actual Regex object is associated, null is returned. + ///@return A pointer to the associated constant Regex object or null. + virtual Regex const * getRegexObject() const { + return re; + } + + ///Get pointer to numbered substring vector. + ///@return Pointer to const numbered substring vector. + virtual VecNum const* getNumberedSubstringVector() const { + return vec_num; + } + + ///Get pointer to named substring vector. + ///@return Pointer to const named substring vector. + virtual VecNas const* getNamedSubstringVector() const { + return vec_nas; + } + + ///Get pointer to name to number map vector. + ///@return Pointer to const name to number map vector. + virtual VecNtN const* getNameToNumberMapVector() const { + return vec_ntn; + } + + ///Set the associated regex object. + ///Null pointer unsets it. + ///Underlying data is not modified. + ///@param r Pointer to a Regex object. + ///@return Reference to the calling RegexMatch object. + virtual RegexMatch& setRegexObject(Regex const *r){ + re = r; + return *this; + } + + /// Set a pointer to the numbered substring vector. + /// Null pointer unsets it. + /// + /// This vector will be filled with numbered (indexed) captured groups. + /// @param v pointer to the numbered substring vector + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setNumberedSubstringVector(VecNum* v) { + vec_num = v; + return *this; + } + + /// Set a pointer to the named substring vector. + /// Null pointer unsets it. + /// + /// This vector will be populated with named captured groups. + /// @param v pointer to the named substring vector + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setNamedSubstringVector(VecNas* v) { + vec_nas = v; + return *this; + } + + /// Set a pointer to the name to number map vector. + /// Null pointer unsets it. + /// + /// This vector will be populated with name to number map for captured groups. + /// @param v pointer to the name to number map vector + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setNameToNumberMapVector(VecNtN* v) { + vec_ntn = v; + return *this; + } + + /// Set the pointer to a vector to store the offsets where matches + /// start in the subject. + /// Null pointer unsets it. + /// @param v Pointer to a jpcre2::VecOff vector (std::vector) + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setMatchStartOffsetVector(VecOff* v){ + vec_soff = v; + return *this; + } + + /// Set the pointer to a vector to store the offsets where matches + /// end in the subject. + /// Null pointer unsets it. + /// @param v Pointer to a VecOff vector (std::vector) + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setMatchEndOffsetVector(VecOff* v){ + vec_eoff = v; + return *this; + } + + ///Set the subject string for match. + ///This makes a copy of the subject string. + /// @param s Subject string + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setSubject() + virtual RegexMatch& setSubject(String const &s) { + m_subject = s; + m_subject_ptr = &m_subject; //must overwrite + return *this; + } + + ///@overload + ///... + /// Works with the original without modifying it. Null pointer unsets the subject. + /// @param s Pointer to subject string + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setSubject() + virtual RegexMatch& setSubject(String const *s) { + if(s) m_subject_ptr = s; + else { + m_subject.clear(); + m_subject_ptr = &m_subject; + } + return *this; + } + + + /// Set the modifier (resets all JPCRE2 and PCRE2 options) by calling RegexMatch::changeModifier(). + /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier to set their equivalent options. + /// @param s Modifier string. + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setModifier() + /// @see Regex::setModifier() + virtual RegexMatch& setModifier(Modifier const& s) { + match_opts = 0; + jpcre2_match_opts = 0; + changeModifier(s, true); + return *this; + } + + ///Set a custom modifier table to be used. + ///@param mdt pointer to ModifierTable object. + ///@return Reference to the calling RegexMatch object. + virtual RegexMatch& setModifierTable(ModifierTable const * mdt){ + modtab = mdt; + return *this; + } + + /// Set JPCRE2 option for match (resets all) + /// @param x Option value + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setJpcre2Option() + /// @see Regex::setJpcre2Option() + virtual RegexMatch& setJpcre2Option(Uint x) { + jpcre2_match_opts = x; + return *this; + } + + ///Set PCRE2 option match (overwrite existing option) + /// @param x Option value + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setPcre2Option() + /// @see Regex::setPcre2Option() + virtual RegexMatch& setPcre2Option(Uint x) { + match_opts = x; + return *this; + } + + /// Set whether to perform global match + /// @param x True or False + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setFindAll(bool x) { + jpcre2_match_opts = x?jpcre2_match_opts | FIND_ALL:jpcre2_match_opts & ~FIND_ALL; + return *this; + } + + ///@overload + ///... + ///This function just calls RegexMatch::setFindAll(bool x) with `true` as the parameter + ///@return Reference to the calling RegexMatch object + virtual RegexMatch& setFindAll() { + return setFindAll(true); + } + + /// Set offset from where match starts. + /// When FIND_ALL is set, a global match would not be performed on all positions on the subject, + /// rather it will be performed from the start offset and onwards. + /// @param offset Start offset + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setStartOffset(PCRE2_SIZE offset) { + _start_offset = offset; + return *this; + } + + ///Set the match context. + ///You can create match context using the native PCRE2 API. + ///The memory is not handled by RegexMatch object and not freed. + ///User will be responsible for freeing the memory of the match context. + ///@param match_context Pointer to the match context. + ///@return Reference to the calling RegexMatch object + virtual RegexMatch& setMatchContext(MatchContext *match_context){ + mcontext = match_context; + return *this; + } + + ///Return pointer to the match context that was previously set with setMatchContext(). + ///Handling memory is the callers' responsibility. + ///@return pointer to the match context (default: null). + virtual MatchContext* getMatchContext(){ + return mcontext; + } + + ///Set the match data block to be used. + ///The memory is not handled by RegexMatch object and not freed. + ///User will be responsible for freeing the memory of the match data block. + ///@param madt Pointer to a match data block. + ///@return Reference to the calling RegexMatch object + virtual RegexMatch& setMatchDataBlock(MatchData* madt){ + mdata = madt; + return *this; + } + + ///Get the pointer to the match data block that was set previously with setMatchData() + ///Handling memory is the callers' responsibility. + ///@return pointer to the match data (default: null). + virtual MatchData* getMatchDataBlock(){ + return mdata; + } + + /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. + /// This function does not initialize or re-initialize options. + /// If you want to set options from scratch, initialize them to 0 before calling this function. + /// If invalid modifier is detected, then the error number for the RegexMatch + /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. + /// You can get the message with RegexMatch::getErrorMessage() function. + /// + /// @param mod Modifier string. + /// @param x Whether to add or remove option + /// @return Reference to the RegexMatch object + /// @see Regex::changeModifier() + /// @see RegexReplace::changeModifier() + virtual RegexMatch& changeModifier(Modifier const& mod, bool x){ + modtab ? modtab->toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset) + : MOD::toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset); + return *this; + } + + /// Add or remove a JPCRE2 option + /// @param opt JPCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::changeJpcre2Option() + /// @see Regex::changeJpcre2Option() + virtual RegexMatch& changeJpcre2Option(Uint opt, bool x) { + jpcre2_match_opts = x ? jpcre2_match_opts | opt : jpcre2_match_opts & ~opt; + return *this; + } + + /// Add or remove a PCRE2 option + /// @param opt PCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::changePcre2Option() + /// @see Regex::changePcre2Option() + virtual RegexMatch& changePcre2Option(Uint opt, bool x) { + match_opts = x ? match_opts | opt : match_opts & ~opt; + return *this; + } + + /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. + /// This is just a wrapper of the original function RegexMatch::changeModifier() + /// @param mod Modifier string. + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::addModifier() + /// @see Regex::addModifier() + virtual RegexMatch& addModifier(Modifier const& mod){ + return changeModifier(mod, true); + } + + /// Add option to existing JPCRE2 options for match + /// @param x Option value + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::addJpcre2Option() + /// @see Regex::addJpcre2Option() + virtual RegexMatch& addJpcre2Option(Uint x) { + jpcre2_match_opts |= x; + return *this; + } + + /// Add option to existing PCRE2 options for match + /// @param x Option value + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::addPcre2Option() + /// @see Regex::addPcre2Option() + virtual RegexMatch& addPcre2Option(Uint x) { + match_opts |= x; + return *this; + } + + + /// Perform match operation using info from class variables and return the match count and + /// store the results in specified vectors. + /// + /// Note: This function uses pcre2_match() function to do the match. + ///@return Match count + virtual SIZE_T match(void); + }; + + + ///This class contains a typedef of a function pointer or a templated function wrapper (`std::function`) + ///to provide callback function to the `MatchEvaluator`. + ///`std::function` is used when `>=C++11` is being used , otherwise function pointer is used. + ///You can force using function pointer instead of `std::function` when `>=C++11` is used by defining the macro + ///`JPCRE2_USE_FUNCTION_POINTER_CALLBACK` before including jpcre2.hpp. + ///If you are using lambda function with capture, you must use the `std::function` approach. + /// + ///The callback function takes exactly three positional arguments: + ///@tparam T1 The first argument must be `jp::NumSub const &` aka `std::vector const &` (or `void*` if not needed). + ///@tparam T2 The second argument must be `jp::MapNas const &` aka `std::map const &` (or `void*` if not needed). + ///@tparam T3 The third argument must be `jp::MapNtN const &` aka `std::map const &` (or `void*` if not needed). + /// + /// **Examples:** + /// ```cpp + /// typedef jpcre2::select jp; + /// jp::String myCallback1(jp::NumSub const &m1, void*, void*){ + /// return "("+m1[0]+")"; + /// } + /// + /// jp::String myCallback2(jp::NumSub const &m1, jp::MapNas const &m2, void*){ + /// return "("+m1[0]+"/"+m2.at("total")+")"; + /// } + /// //Now you can pass these functions in MatchEvaluator constructors to create a match evaluator + /// jp::MatchEvaluator me1(myCallback1); + /// + /// //Examples with lambda (>=C++11) + /// jp::MatchEvaluator me2([](jp::NumSub const &m1, void*, void*) + /// { + /// return "("+m1[0]+")"; + /// }); + /// ``` + ///@see MatchEvaluator + template + struct MatchEvaluatorCallback{ + #if !defined JPCRE2_USE_FUNCTION_POINTER_CALLBACK && JPCRE2_USE_MINIMUM_CXX_11 + typedef std::function Callback; + #else + typedef String (*Callback)(T1,T2,T3); + #endif + }; + + ///Provides some default static callback functions. + ///The primary goal of this class is to provide default + ///callback function to MatchEvaluator default constructor which is + ///essentially callback::erase. + ///This class does not allow object instantiation. + struct callback{ + ///Callback function that removes the matched part/s in the subject string + /// and takes all match vectors as argument. + ///Even though this function itself does not use the vectors, it still takes them + ///so that the caller can perform a match and populate all the match data to perform + ///further evaluation of other callback functions without doing the match again. + ///@param num jp::NumSub vector. + ///@param nas jp::MapNas map. + ///@param ntn jp::MapNtN map. + ///@return empty string. + static String eraseFill(NumSub const &num, MapNas const &nas, MapNtN const &ntn){ + return String(); + } + + ///Callback function that removes the matched part/s in the subject string + ///and does not take any match vector. + ///This is a minimum cost pattern deleting callback function. + /// + ///It's the default callback function when you Instantiate + ///a MatchEvaluator object with its default constructor: + ///```cpp + ///MatchEvaluator me; + ///``` + ///@return empty string. + static String erase(void*, void*, void*){ + return String(); + } + + ///Callback function for populating match vectors that does not modify the subject string. + ///It always returns the total matched part and thus the subject string remains the same. + ///@param num jp::NumSub vector. + ///@param nas jp::MapNas map. + ///@param ntn jp::MapNtN map. + ///@return total match (group 0) of current match. + static String fill(NumSub const &num, MapNas const &nas, MapNtn const &ntn){ + #ifdef JPCRE2_UNSET_CAPTURES_NULL + return *num[0]; + #else + return num[0]; + #endif + } + + private: + //prevent object instantiation. + callback(); + callback(callback const &); + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + callback(callback&&); + #endif + ~callback(); + }; + + ///This class inherits RegexMatch and provides a similar functionality. + ///All public member functions from RegexMatch class are publicly available except the following: + ///* setNumberedSubstringVector + ///* setNamedSubstringVector + ///* setNameToNumberMapVector + ///* setMatchStartOffsetVector + ///* setMatchEndOffsetVector + /// + ///The use of above functions is not allowed as the vectors are created according to the callback function you pass. + /// + ///Each constructor of this class takes a callback function as argument (see `MatchEvaluatorCallback`). + /// + ///It provides a MatchEvaluator::nreplace() function to perform replace operation using native JPCRE2 approach + ///and `MatchEvaluator::replace()` function for PCRE2 compatible replace operation. + /// + ///An instance of this class can also be passed with `RegexReplace::nreplace()` or `RegexReplace::replace()` function to perform replacement + ///according to this match evaluator. + /// + ///Match data is stored in vectors, and the vectors are populated according to the callback functions. + ///Populated vector data is never deleted but they get overwritten. Vector data can be manually zeroed out + ///by calling `MatchEvaluator::clearMatchData()`. If the capacities of those match vectors are desired to + ///to be shrinked too instead of just clearing them, use `MatchEvaluator::resetMatchData()` instead. + /// + /// # Re-usability of Match Data + /// A match data populated with a callback function that takes only a jp::NumSub vector is not compatible + /// with the data created according to callback function with a jp::MapNas vector. + /// Because, for this later callback, jp::MapNas data is required but is not available (only jp::NumSub is available). + /// In such cases, previous Match data can not be used to perform a new replacment operation with this second callback function. + /// + /// To populate the match vectors, one must call the `MatchEvaluator::match()` or `MatchEvaluator::nreplace()` function, they will populate + /// vectors with match data according to call back function. + /// + /// ## Example: + /// + /// ```cpp + /// jp::String callback5(NumSub const &m, void*, MapNtn const &n){ + /// return m[0]; + /// } + /// jp::String callback4(void*, void*, MapNtn const &n){ + /// return std::to_string(n.at("name")); //position of group 'name'. + /// } + /// jp::String callback2(void*, MapNas const &m, void*){ + /// return m.at('name'); //substring by name + /// } + /// + /// jp::MatchEvaluator me; + /// me.setRegexObject(&re).setSubject("string").setCallback(callback5).nreplace(); + /// //In above, nreplace() populates jp::NumSub and jp::MapNtn with match data. + /// + /// me.setCallback(callback4).nreplace(false); + /// //the above uses previous match result (note the 'false') which is OK, + /// //because, callback4 requires jp::MapNtn which was made available in the previous operation. + /// + /// //but the following is not OK: (assertion failure) + /// me.setCallback(callback2).nreplace(false); + /// //because, callback2 requires jp::MapNas data which is not available. + /// //now, this is OK: + /// me.setCallback(callback2).nreplace(); + /// //because, it will recreate those match data including this one (jp::MapNas). + /// ``` + /// + /// # Replace options + /// MatchEvaluator can not take replace options. + /// Replace options are taken directly by the replace functions: `nreplace()` and `replace()`. + /// + /// # Using as a match object + /// As it's just a subclass of RegexMatch, it can do all the things that RegexMatch can do, with some restrictions: + /// * matching options are modified to strip off bad options according to replacement (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT). + /// * match depends on the callback function. Only those vectors will be populated that are implemented by the callback functions so far + /// (multiple callback function will set multiple match data vectors.) + /// * match vectors are internal to this class, you can not set them manually (without callback function). (you can get pointers to these vectors + /// with `getNumberedSubstringVector()` and related functions). + /// + ///@see MatchEvaluatorCallback + ///@see RegexReplace::nreplace() + class MatchEvaluator: virtual public RegexMatch{ + private: + friend class RegexReplace; + + VecNum vec_num; + VecNas vec_nas; + VecNtN vec_ntn; + VecOff vec_soff; + VecOff vec_eoff; + int callbackn; + typename MatchEvaluatorCallback::Callback callback0; + typename MatchEvaluatorCallback::Callback callback1; + typename MatchEvaluatorCallback::Callback callback2; + typename MatchEvaluatorCallback::Callback callback3; + typename MatchEvaluatorCallback::Callback callback4; + typename MatchEvaluatorCallback::Callback callback5; + typename MatchEvaluatorCallback::Callback callback6; + typename MatchEvaluatorCallback::Callback callback7; + //Q: Why the callback names seem random? is it random? + //A: No, it's not random, NumSub = 1, MapNas = 2, MapNtn = 4, thus: + // NumSub + MapNas = 3 + // NumSub + MapNtn = 5 + // MapNas + MapNtn = 6 + // NumSub + MapNas + MapNtn = 7 + //Q: Why is it like this? + //A: It's historical. Once, there was not this many callback declaration, there was only one (a templated one). + // The nreplace function itself used to calculate a mode value according to available vectors + // and determine what kind of callback function needed to be called. + //Q: Why the history changed? + //A: We had some compatibility issues with the single templated callback. + // Also, this approach proved to be more readable and robust. + + PCRE2_SIZE buffer_size; + + + void init(){ + callbackn = 0; + callback0 = callback::erase; + callback1 = 0; + callback2 = 0; + callback3 = 0; + callback4 = 0; + callback5 = 0; + callback6 = 0; + callback7 = 0; + setMatchStartOffsetVector(&vec_soff); + setMatchEndOffsetVector(&vec_eoff); + buffer_size = 0; + } + + void setVectorPointersAccordingToCallback(){ + switch(callbackn){ + case 0: break; + case 1: setNumberedSubstringVector(&vec_num);break; + case 2: setNamedSubstringVector(&vec_nas);break; + case 3: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas);break; + case 4: setNameToNumberMapVector(&vec_ntn);break; + case 5: setNumberedSubstringVector(&vec_num).setNameToNumberMapVector(&vec_ntn);break; + case 6: setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break; + case 7: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break; + } + } + + void onlyCopy(MatchEvaluator const &me){ + callbackn = me.callbackn; + callback0 = me.callback0; + callback1 = me.callback1; + callback2 = me.callback2; + callback3 = me.callback3; + callback4 = me.callback4; + callback5 = me.callback5; + callback6 = me.callback6; + callback7 = me.callback7; + //must update the pointers to point to this class vectors. + setVectorPointersAccordingToCallback(); + buffer_size = me.buffer_size; + } + + void deepCopy(MatchEvaluator const &me) { + vec_num = me.vec_num; + vec_nas = me.vec_nas; + vec_ntn = me.vec_ntn; + vec_soff = me.vec_soff; + vec_eoff = me.vec_eoff; + onlyCopy(me); + } + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + void deepMove(MatchEvaluator& me){ + vec_num = std::move_if_noexcept(me.vec_num); + vec_nas = std::move_if_noexcept(me.vec_nas); + vec_ntn = std::move_if_noexcept(me.vec_ntn); + vec_soff = std::move_if_noexcept(me.vec_soff); + vec_eoff = std::move_if_noexcept(me.vec_eoff); + onlyCopy(me); + } + #endif + + //prevent public access to some funcitons + MatchEvaluator& setNumberedSubstringVector(VecNum* v){ + RegexMatch::setNumberedSubstringVector(v); + return *this; + } + MatchEvaluator& setNamedSubstringVector(VecNas* v){ + RegexMatch::setNamedSubstringVector(v); + return *this; + } + MatchEvaluator& setNameToNumberMapVector(VecNtN* v){ + RegexMatch::setNameToNumberMapVector(v); + return *this; + } + MatchEvaluator& setMatchStartOffsetVector(VecOff* v){ + RegexMatch::setMatchStartOffsetVector(v); + return *this; + } + MatchEvaluator& setMatchEndOffsetVector(VecOff* v){ + RegexMatch::setMatchEndOffsetVector(v); + return *this; + } + + public: + + ///Default constructor. + ///Sets callback::erase as the callback function. + ///Removes matched part/s from the subject string if the callback is not + ///changed. + /// ```cpp + /// jp::Regex re("\s*string"); + /// jp::MatchEvaluator me; + /// std::cout<< + /// me.setRegexObject(&re); + /// .setSubject("I am a string"); + /// .nreplace(); + /// //The above will delete ' string' from the subject + /// //thus the result will be 'I am a' + /// ``` + explicit + MatchEvaluator():RegexMatch(){ + init(); + } + + ///@overload + ///... + ///Constructor taking a Regex object pointer. + ///It sets the associated Regex object and + ///initializes the MatchEvaluator object with + ///callback::erase callback function. + ///Underlying data is not modified. + ///@param r constant Regex pointer. + explicit + MatchEvaluator(Regex const *r):RegexMatch(r){ + init(); + } + + ///@overload + ///... + ///Constructor taking a callback function. + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + + + ///@overload + /// ... + ///Copy constructor. + ///@param me Reference to MatchEvaluator object + MatchEvaluator(MatchEvaluator const &me): RegexMatch(me){ + init(); + deepCopy(me); + } + + ///Overloaded copy-assignment operator + ///@param me MatchEvaluator object + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& operator=(MatchEvaluator const &me){ + if(this == &me) return *this; + RegexMatch::operator=(me); + deepCopy(me); + return *this; + } + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + + ///@overload + /// ... + ///Move constructor. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param me rvalue reference to a MatchEvaluator object + MatchEvaluator(MatchEvaluator&& me): RegexMatch(me){ + init(); + deepMove(me); + } + + ///@overload + ///... + ///Overloaded move-assignment operator. + ///It steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param me rvalue reference to a MatchEvaluator object + ///@return A reference to the calling MatchEvaluator object. + ///@see MatchEvaluator(MatchEvaluator&& me) + MatchEvaluator& operator=(MatchEvaluator&& me){ + if(this == &me) return *this; + RegexMatch::operator=(me); + deepMove(me); + return *this; + } + + #endif + + virtual ~MatchEvaluator(){} + + ///Member function to set a callback function with no vector reference. + ///Callback function is always overwritten. The implemented vectors are set to be filled with match data. + ///Other vectors that were set previously, are not unset and thus they will be filled with match data too + ///when `match()` or `nreplace()` is called. + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback0 = mef; + callbackn = 0; + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::NumSub vector. + ///You will be working with a reference to the constant vector. + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback1 = mef; + callbackn = 1; + setNumberedSubstringVector(&vec_num); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::NumSub and jp::MapNas. + ///You will be working with references of the constant vectors. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_nas["word"]; //wrong + ///map_nas.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNas mn = map_nas; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback3 = mef; + callbackn = 3; + setNumberedSubstringVector(&vec_num); + setNamedSubstringVector(&vec_nas); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::NumSub and jp::MapNtN. + ///You will be working with references of the constant vectors. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_ntn["word"]; //wrong + ///map_ntn.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNtN mn = map_ntn; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback5 = mef; + callbackn = 5; + setNumberedSubstringVector(&vec_num); + setNameToNumberMapVector(&vec_ntn); + return *this; + } + + + ///@overload + /// ... + ///Sets a callback function with a jp::NumSub, jp::MapNas, jp::MapNtN. + ///You will be working with references of the constant vectors. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_nas["word"]; //wrong + ///map_nas.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNas mn = map_nas; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback7 = mef; + callbackn = 7; + setNumberedSubstringVector(&vec_num); + setNamedSubstringVector(&vec_nas); + setNameToNumberMapVector(&vec_ntn); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::MapNas. + ///You will be working with reference of the constant vector. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_nas["word"]; //wrong + ///map_nas.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNas mn = map_nas; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback2 = mef; + callbackn = 2; + setNamedSubstringVector(&vec_nas); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::MapNas, jp::MapNtN. + ///You will be working with reference of the constant vector. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_nas["word"]; //wrong + ///map_nas.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNas mn = map_nas; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback6 = mef; + callbackn = 6; + setNamedSubstringVector(&vec_nas); + setNameToNumberMapVector(&vec_ntn); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::MapNtN. + ///You will be working with references of the constant vectors. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_ntn["word"]; //wrong + ///map_ntn.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNtN mn = map_ntn; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback4 = mef; + callbackn = 4; + setNameToNumberMapVector(&vec_ntn); + return *this; + } + + ///Clear match data. + ///It clears all match data from all vectors (without shrinking). + ///For shrinking the vectors, use `resetMatchData()` + ///A call to `match()` or nreplace() will be required to produce match data again. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& clearMatchData(){ + vec_num.clear(); + vec_nas.clear(); + vec_ntn.clear(); + vec_soff.clear(); + vec_eoff.clear(); + return *this; + } + + ///Reset match data to initial state. + ///It deletes all match data from all vectors shrinking their capacity. + ///A call to `match()` or nreplace() will be required to produce match data again. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& resetMatchData(){ + VecNum().swap(vec_num); + VecNas().swap(vec_nas); + VecNtN().swap(vec_ntn); + VecOff().swap(vec_soff); + VecOff().swap(vec_eoff); + return *this; + } + + + ///Reset MatchEvaluator to initial state including memory. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& reset(){ + RegexMatch::reset(); + resetMatchData(); + init(); + return *this; + } + + ///Clears MatchEvaluator. + ///Returns everything to initial state (some memory may retain for further and faster use). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& clear(){ + RegexMatch::clear(); + clearMatchData(); + init(); + return *this; + } + + ///Call RegexMatch::resetErrors(). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& resetErrors(){ + RegexMatch::resetErrors(); + return *this; + } + + ///Call RegexMatch::setRegexObject(r). + ///@param r constant Regex object pointer + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setRegexObject (Regex const *r){ + RegexMatch::setRegexObject(r); + return *this; + } + + ///Call RegexMatch::setSubject(String const &s). + ///@param s subject string. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setSubject (String const &s){ + RegexMatch::setSubject(s); + return *this; + } + + ///@overload + ///@param s constant subject string by pointer + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setSubject (String const *s){ + RegexMatch::setSubject(s); + return *this; + } + + ///Call RegexMatch::setModifier(Modifier const& s). + ///@param s modifier string. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setModifier (Modifier const& s){ + RegexMatch::setModifier(s); + return *this; + } + + ///Call RegexMatch::setModifierTable(ModifierTable const * s). + ///@param mdt pointer to ModifierTable object. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setModifierTable (ModifierTable const * mdt){ + RegexMatch::setModifierTable(mdt); + return *this; + } + + ///Call RegexMatch::setJpcre2Option(Uint x). + ///@param x JPCRE2 option value. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setJpcre2Option (Uint x){ + RegexMatch::setJpcre2Option(x); + return *this; + } + + ///Call RegexMatch::setPcre2Option (Uint x). + ///@param x PCRE2 option value. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setPcre2Option (Uint x){ + RegexMatch::setPcre2Option(x); + return *this; + } + + ///Call RegexMatch::setFindAll(bool x). + ///@param x true if global match, false otherwise. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setFindAll (bool x){ + RegexMatch::setFindAll(x); + return *this; + } + + ///Call RegexMatch::setFindAll(). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setFindAll(){ + RegexMatch::setFindAll(); + return *this; + } + + ///Call RegexMatch::setStartOffset (PCRE2_SIZE offset). + ///@param offset match start offset in the subject. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setStartOffset (PCRE2_SIZE offset){ + RegexMatch::setStartOffset(offset); + return *this; + } + + ///Call RegexMatch::setMatchContext(MatchContext *match_context). + ///@param match_context pointer to match context. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setMatchContext (MatchContext *match_context){ + RegexMatch::setMatchContext(match_context); + return *this; + } + + ///Call RegexMatch::setMatchDataBlock(MatchContext * mdt); + ///@param mdt pointer to match data block + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setMatchDataBlock(MatchData* mdt){ + RegexMatch::setMatchDataBlock(mdt); + return *this; + } + + ///Set the buffer size that will be used by pcre2_substitute (replace()). + ///If buffer size proves to be enough to fit the resultant string + ///from each match (not the total resultant string), it will yield one less call + ///to pcre2_substitute for each match. + ///@param x buffer size. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setBufferSize(PCRE2_SIZE x){ + buffer_size = x; + return *this; + } + + ///Get the initial buffer size that is being used by internal function pcre2_substitute + ///@return buffer_size + PCRE2_SIZE getBufferSize(){ + return buffer_size; + } + + ///Call RegexMatch::changeModifier(Modifier const& mod, bool x). + ///@param mod modifier string. + ///@param x true (add) or false (remove). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& changeModifier (Modifier const& mod, bool x){ + RegexMatch::changeModifier(mod, x); + return *this; + } + + ///Call RegexMatch::changeJpcre2Option(Uint opt, bool x). + ///@param opt JPCRE2 option + ///@param x true (add) or false (remove). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& changeJpcre2Option (Uint opt, bool x){ + RegexMatch::changeJpcre2Option(opt, x); + return *this; + } + + ///Call RegexMatch::changePcre2Option(Uint opt, bool x). + ///@param opt PCRE2 option. + ///@param x true (add) or false (remove). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& changePcre2Option (Uint opt, bool x){ + RegexMatch::changePcre2Option(opt, x); + return *this; + } + + ///Call RegexMatch::addModifier(Modifier const& mod). + ///@param mod modifier string. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& addModifier (Modifier const& mod){ + RegexMatch::addModifier(mod); + return *this; + } + + ///Call RegexMatch::addJpcre2Option(Uint x). + ///@param x JPCRE2 option. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& addJpcre2Option (Uint x){ + RegexMatch::addJpcre2Option(x); + return *this; + } + + ///Call RegexMatch::addPcre2Option(Uint x). + ///@param x PCRE2 option. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& addPcre2Option (Uint x){ + RegexMatch::addPcre2Option(x); + return *this; + } + + ///Perform match and return the match count. + ///This function strips off matching options (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT) that are considered + ///bad options for replacement operation and then calls + ///RegexMatch::match() to perform the match. + ///@return match count. + SIZE_T match(void){ + //remove bad matching options + RegexMatch::changePcre2Option(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT, false); + return RegexMatch::match(); + } + + ///Perform regex replace with this match evaluator. + ///This is a JPCRE2 native replace function (thus the name nreplace). + ///It uses the `MatchEvaluatorCallback` function that was set with a constructor or `MatchEvaluator::setCallback()` function + ///to generate the replacement strings on the fly. + ///The string returned by the callback function will be treated as literal and will + ///not go through any further processing. + /// + ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument. + ///To use existing match data that was created by a previous `MatchEvaluator::nreplace()` or `MatchEvaluator::match()`, call this + ///function with boolean `false` as the first argument. + /// + ///## Complexity + /// 1. Changes in replace related option takes effect without a re-match. + /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect. + /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error. + /// 4. If the associated Regex object or subject string changes, a new match must be performed, + /// trying to use the existing match data in such cases is undefined behavior. + /// + ///@param do_match Perform a new matching operation if true, otherwise use existing match data. + ///@param jro JPCRE2 replace options. + ///@param counter Pointer to a counter to store the number of replacement done. + ///@return resultant string after replace. + ///@see MatchEvaluator. + ///@see MatchEvaluatorCallback. + String nreplace(bool do_match=true, Uint jro=0, SIZE_T* counter=0); + + ///PCRE2 compatible replace function that uses this MatchEvaluator. + ///Performs regex replace with pcre2_substitute function + ///by generating the replacement strings dynamically with MatchEvaluator callback. + ///The string returned by callback function is processed by internal pcre2_substitute, thus allowing + ///all options that are provided by PCRE2 itself. + /// + ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument. + /// + ///## Complexity + /// 1. Changes in replace related option takes effect without a re-match. + /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect. + /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error. + /// 4. If the associated Regex object or subject string changes, a new match must be performed, + /// trying to use the existing match data in such cases is undefined behavior. + /// + ///@param do_match perform a new match if true, otherwise use existing data. + ///@param ro replace related PCRE2 options. + ///@param counter Pointer to a counter to store the number of replacement done. + ///@return resultant string after replacement. + String replace(bool do_match=true, Uint ro=0, SIZE_T* counter=0); + }; + + /** Provides public constructors to create RegexReplace objects. + * Every RegexReplace object should be associated with a Regex object. + * This class stores a pointer to its' associated Regex object, thus when + * the content of the associated Regex object is changed, there's no need to + * set the pointer again. + * + * Examples: + * + * ```cpp + * jp::Regex re; + * jp::RegexReplace rr; + * rr.setRegexObject(&re); + * rr.replace("subject", "me"); // returns 'subject' + * re.compile("\\w+"); + * rr.replace(); // replaces 'subject' with 'me' i.e returns 'me' + * ``` + */ + class RegexReplace { + + private: + + friend class Regex; + + Regex const *re; + + String r_subject; + String *r_subject_ptr; //preplace method modifies it in-place + String r_replw; + String const *r_replw_ptr; + Uint replace_opts; + Uint jpcre2_replace_opts; + PCRE2_SIZE buffer_size; + PCRE2_SIZE _start_offset; + MatchData *mdata; + MatchContext *mcontext; + ModifierTable const * modtab; + SIZE_T last_replace_count; + SIZE_T* last_replace_counter; + + void init_vars() { + re = 0; + r_subject_ptr = &r_subject; + r_replw_ptr = &r_replw; + replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; + jpcre2_replace_opts = 0; + buffer_size = 0; + error_number = 0; + error_offset = 0; + _start_offset = 0; + mdata = 0; + mcontext = 0; + modtab = 0; + last_replace_count = 0; + last_replace_counter = &last_replace_count; + } + + void onlyCopy(RegexReplace const &rr){ + re = rr.re; //only pointer should be copied. + + //rr.r_subject_ptr may point to rr.r_subject or other user data + r_subject_ptr = (rr.r_subject_ptr == &rr.r_subject) ? &r_subject //not rr.r_subject + : rr.r_subject_ptr; //other user data + + r_replw = rr.r_replw; + //rr.r_replw_ptr may point to rr.r_replw or other user data + r_replw_ptr = (rr.r_replw_ptr == &rr.r_replw) ? &r_replw //not rr.r_replw + : rr.r_replw_ptr; //other user data + + replace_opts = rr.replace_opts; + jpcre2_replace_opts = rr.jpcre2_replace_opts; + buffer_size = rr.buffer_size; + error_number = rr.error_number; + error_offset = rr.error_offset; + _start_offset = rr._start_offset; + mdata = rr.mdata; + mcontext = rr.mcontext; + modtab = rr.modtab; + last_replace_count = rr.last_replace_count; + last_replace_counter = (rr.last_replace_counter == &rr.last_replace_count) ? &last_replace_count + : rr.last_replace_counter; + } + + void deepCopy(RegexReplace const &rr){ + r_subject = rr.r_subject; + onlyCopy(rr); + } + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + void deepMove(RegexReplace& rr){ + r_subject = std::move_if_noexcept(rr.r_subject); + onlyCopy(rr); + } + #endif + + + protected: + + int error_number; + PCRE2_SIZE error_offset; + + public: + + ///Default constructor + RegexReplace(){ + init_vars(); + } + + ///@overload + /// ... + ///Creates a RegexReplace object associating a Regex object. + ///Regex object is not modified. + ///@param r pointer to a Regex object + RegexReplace(Regex const *r) { + init_vars(); + re = r; + } + + ///@overload + ///... + ///Copy constructor. + ///@param rr RegexReplace object reference + RegexReplace(RegexReplace const &rr){ + init_vars(); + deepCopy(rr); + } + + ///Overloaded Copy assignment operator. + ///@param rr RegexReplace object reference + ///@return A reference to the calling RegexReplace object + RegexReplace& operator=(RegexReplace const &rr){ + if(this == &rr) return *this; + deepCopy(rr); + return *this; + } + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + + ///@overload + ///... + ///Move constructor. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param rr rvalue reference to a RegexReplace object reference + RegexReplace(RegexReplace&& rr){ + init_vars(); + deepMove(rr); + } + + ///@overload + ///... + ///Overloaded move assignment operator. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param rr rvalue reference to a RegexReplace object reference + ///@return A reference to the calling RegexReplace object + RegexReplace& operator=(RegexReplace&& rr){ + if(this == &rr) return *this; + deepMove(rr); + return *this; + } + + #endif + + virtual ~RegexReplace() {} + + ///Reset all class variables to its default (initial) state including memory. + ///@return Reference to the calling RegexReplace object. + RegexReplace& reset() { + String().swap(r_subject); + String().swap(r_replw); + init_vars(); + return *this; + } + + ///Clear all class variables to its default (initial) state (some memory may retain for further use). + ///@return Reference to the calling RegexReplace object. + RegexReplace& clear() { + r_subject.clear(); + r_replw.clear(); + init_vars(); + return *this; + } + + ///Reset replace related errors to zero. + ///@return Reference to the calling RegexReplace object + ///@see Regex::resetErrors() + ///@see RegexMatch::resetErrors() + RegexReplace& resetErrors(){ + error_number = 0; + error_offset = 0; + return *this; + } + + /// Returns the last error number + ///@return Last error number + int getErrorNumber() const { + return error_number; + } + + /// Returns the last error offset + ///@return Last error offset + int getErrorOffset() const { + return (int)error_offset; + } + + /// Returns the last error message + ///@return Last error message + String getErrorMessage() const { + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + return select::getErrorMessage(error_number, error_offset); + #else + return select::getErrorMessage(error_number, error_offset); + #endif + } + + /// Get replacement string + ///@return replacement string + String getReplaceWith() const { + return *r_replw_ptr; + } + + /// Get pointer to replacement string + ///@return pointer to replacement string + String const * getReplaceWithPointer() const { + return r_replw_ptr; + } + + /// Get subject string + ///@return subject string + ///@see RegexMatch::getSubject() + String getSubject() const { + return *r_subject_ptr; + } + + /// Get pointer to subject string + ///@return Pointer to constant subject string + ///@see RegexMatch::getSubjectPointer() + String const * getSubjectPointer() const { + return r_subject_ptr; + } + + + /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. + /// + /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized + /// as long as you don't do that explicitly. Calling RegexReplace::setModifier() will re-set them. + /// + /// **Mixed or combined modifier**. + /// + /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers + /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they + /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options + /// get set, and when you remove the 'n' modifier (with `RegexReplace::changeModifier()`), both will get removed. + /// @return Calculated modifier string (std::string) + ///@see RegexMatch::getModifier() + ///@see Regex::getModifier() + std::string getModifier() const { + return modtab ? modtab->fromReplaceOption(replace_opts, jpcre2_replace_opts) + : MOD::fromReplaceOption(replace_opts, jpcre2_replace_opts); + } + + ///Get the modifier table that is set, + ///@return constant ModifierTable pointer. + ModifierTable const* getModifierTable(){ + return modtab; + } + + ///Get start offset. + ///@return the start offset where matching starts for replace operation + PCRE2_SIZE getStartOffset() const { + return _start_offset; + } + + /// Get PCRE2 option + ///@return PCRE2 option for replace + ///@see Regex::getPcre2Option() + ///@see RegexMatch::getPcre2Option() + Uint getPcre2Option() const { + return replace_opts; + } + + /// Get JPCRE2 option + ///@return JPCRE2 option for replace + ///@see Regex::getJpcre2Option() + ///@see RegexMatch::getJpcre2Option() + Uint getJpcre2Option() const { + return jpcre2_replace_opts; + } + + ///Get a pointer to the associated Regex object. + ///If no actual Regex object is associated, null is returned + ///@return A pointer to the associated constant Regex object or null + Regex const * getRegexObject() const { + return re; + } + + ///Return pointer to the match context that was previously set with setMatchContext(). + ///Handling memory is the callers' responsibility. + ///@return pointer to the match context (default: null). + MatchContext* getMatchContext(){ + return mcontext; + } + + ///Get the pointer to the match data block that was set previously with setMatchData() + ///Handling memory is the callers' responsibility. + ///@return pointer to the match data (default: null). + virtual MatchData* getMatchDataBlock(){ + return mdata; + } + + ///Get the initial buffer size that is being used by internal function pcre2_substitute + ///@return buffer_size + PCRE2_SIZE getBufferSize(){ + return buffer_size; + } + + ///Get the number of replacement in last replace operation. + ///If you set an external counter with RegexReplace::setReplaceCounter(), + ///a call to this getter method will dereference the pointer to the external counter + ///and return the value. + ///@return Last replace count + SIZE_T getLastReplaceCount(){ + return *last_replace_counter; + } + + ///Set an external counter variable to store the replacement count. + ///This counter will be updated after each replacement operation on this object. + ///A call to this method will reset the internal counter to 0, thus when you reset the counter + ///to internal counter (by giving null as param), the previous replace count won't be available. + ///@param counter Pointer to a counter variable. Null sets the counter to default internal counter. + ///@return Reference to the calling RegexReplace object. + RegexReplace& setReplaceCounter(SIZE_T* counter){ + last_replace_count = 0; + last_replace_counter = counter ? counter : &last_replace_count; + return *this; + } + + ///Set the associated Regex object. + ///Regex object is not modified. + ///@param r Pointer to a Regex object. + ///@return Reference to the calling RegexReplace object. + RegexReplace& setRegexObject(Regex const *r){ + re = r; + return *this; + } + + /// Set the subject string for replace. + ///This makes a copy of the string. If no copy is desired or you are working + ///with big text, consider passing by pointer. + ///@param s Subject string + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setSubject() + RegexReplace& setSubject(String const &s) { + r_subject = s; + r_subject_ptr = &r_subject; //must overwrite + return *this; + } + + ///@overload + ///... + /// Set pointer to the subject string for replace, null pointer unsets it. + /// The underlined data is not modified unless RegexReplace::preplace() method is used. + ///@param s Pointer to subject string + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setSubject() + RegexReplace& setSubject(String *s) { + if(s) r_subject_ptr = s; + else { + r_subject.clear(); + r_subject_ptr = &r_subject; + } + return *this; + } + + /// Set the replacement string. + ///`$` is a special character which implies captured group. + /// + ///1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number. + ///2. A named substring can be referenced with `${name}`, where 'name' is the group name. + ///3. A literal `$` can be given as `$$`. + /// + ///**Note:** This function makes a copy of the string. If no copy is desired or + ///you are working with big text, consider passing the string with pointer. + /// + ///@param s String to replace with + ///@return Reference to the calling RegexReplace object + RegexReplace& setReplaceWith(String const &s) { + r_replw = s; + r_replw_ptr = &r_replw; //must overwrite + return *this; + } + + ///@overload + ///... + ///@param s Pointer to the string to replace with, null pointer unsets it. + ///@return Reference to the calling RegexReplace object + RegexReplace& setReplaceWith(String const *s) { + if(s) r_replw_ptr = s; + else { + r_replw.clear(); + r_replw_ptr = &r_replw; + } + return *this; + } + + /// Set the modifier string (resets all JPCRE2 and PCRE2 options) by calling RegexReplace::changeModifier(). + ///@param s Modifier string. + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setModifier() + ///@see Regex::setModifier() + RegexReplace& setModifier(Modifier const& s) { + replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* must not be initialized to 0 */ + jpcre2_replace_opts = 0; + return changeModifier(s, true); + } + + ///Set a custom modifier table to be used. + ///@param mdt pointer to ModifierTable object. + /// @return Reference to the calling RegexReplace object. + RegexReplace& setModifierTable(ModifierTable const * mdt){ + modtab = mdt; + return *this; + } + + /// Set the initial buffer size to be allocated for replaced string (used by PCRE2) + ///@param x Buffer size + ///@return Reference to the calling RegexReplace object + RegexReplace& setBufferSize(PCRE2_SIZE x) { + buffer_size = x; + return *this; + } + + ///Set start offset. + ///Set the offset where matching starts for replace operation + ///@param start_offset The offset where matching starts for replace operation + ///@return Reference to the calling RegexReplace object + RegexReplace& setStartOffset(PCRE2_SIZE start_offset){ + _start_offset = start_offset; + return *this; + } + + /// Set JPCRE2 option for replace (overwrite existing option) + ///@param x Option value + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setJpcre2Option() + ///@see Regex::setJpcre2Option() + + RegexReplace& setJpcre2Option(Uint x) { + jpcre2_replace_opts = x; + return *this; + } + + /// Set PCRE2 option replace (overwrite existing option) + ///@param x Option value + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setPcre2Option() + ///@see Regex::setPcre2Option() + + RegexReplace& setPcre2Option(Uint x) { + replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | x; + return *this; + } + + ///Set the match context to be used. + ///Native PCRE2 API may be used to create match context. + ///The memory of the match context is not handled by RegexReplace object and not freed. + ///User will be responsible for freeing memory. + ///@param match_context Pointer to match context. + ///@return Reference to the calling RegexReplace object. + RegexReplace& setMatchContext(MatchContext * match_context){ + mcontext = match_context; + return *this; + } + + ///Set the match data block to be used. + ///Native PCRE2 API may be used to create match data block. + ///The memory of the match data is not handled by RegexReplace object and not freed. + ///User will be responsible for creating/freeing memory. + ///@param match_data Pointer to match data. + ///@return Reference to the calling RegexReplace object. + RegexReplace& setMatchDataBlock(MatchData *match_data){ + mdata = match_data; + return *this; + } + + /// After a call to this function PCRE2 and JPCRE2 options will be properly set. + /// This function does not initialize or re-initialize options. + /// If you want to set options from scratch, initialize them to 0 before calling this function. + /// + /// If invalid modifier is detected, then the error number for the RegexReplace + /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. + /// You can get the message with RegexReplace::getErrorMessage() function. + /// @param mod Modifier string. + /// @param x Whether to add or remove option + /// @return Reference to the RegexReplace object + /// @see Regex::changeModifier() + /// @see RegexMatch::changeModifier() + RegexReplace& changeModifier(Modifier const& mod, bool x){ + modtab ? modtab->toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset) + : MOD::toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset); + return *this; + } + + /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. + /// Add or remove a JPCRE2 option + /// @param opt JPCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling RegexReplace object + /// @see RegexMatch::changeJpcre2Option() + /// @see Regex::changeJpcre2Option() + RegexReplace& changeJpcre2Option(Uint opt, bool x) { + jpcre2_replace_opts = x ? jpcre2_replace_opts | opt : jpcre2_replace_opts & ~opt; + return *this; + } + + /// Add or remove a PCRE2 option + /// @param opt PCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling RegexReplace object + /// @see RegexMatch::changePcre2Option() + /// @see Regex::changePcre2Option() + RegexReplace& changePcre2Option(Uint opt, bool x) { + replace_opts = x ? replace_opts | opt : replace_opts & ~opt; + //replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* It's important, but let user override it. */ + return *this; + } + + /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. + /// This is just a wrapper of the original function RegexReplace::changeModifier() + /// provided for convenience. + /// @param mod Modifier string. + /// @return Reference to the calling RegexReplace object + /// @see RegexMatch::addModifier() + /// @see Regex::addModifier() + RegexReplace& addModifier(Modifier const& mod){ + return changeModifier(mod, true); + } + + /// Add specified JPCRE2 option to existing options for replace. + ///@param x Option value + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::addJpcre2Option() + ///@see Regex::addJpcre2Option() + RegexReplace& addJpcre2Option(Uint x) { + jpcre2_replace_opts |= x; + return *this; + } + + /// Add specified PCRE2 option to existing options for replace + ///@param x Option value + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::addPcre2Option() + ///@see Regex::addPcre2Option() + RegexReplace& addPcre2Option(Uint x) { + replace_opts |= x; + return *this; + } + + /// Perform regex replace by retrieving subject string, replacement string, modifier and other options from class variables. + /// In the replacement string (see RegexReplace::setReplaceWith()) `$` is a special character which implies captured group. + /// 1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number. + /// 2. A named substring can be referenced with `${name}`, where 'name' is the group name. + /// 3. A literal `$` can be given as `$$`. + /// 4. Bash like features: ${:-} and ${:+:}, where is a group number or name. + /// + ///All options supported by pcre2_substitute is available. + /// + /// Note: This function calls pcre2_substitute() to do the replacement. + ///@return Replaced string + String replace(void); + + /// Perl compatible replace method. + /// Modifies subject string in-place and returns replace count. + /// + /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`. + /// @return replace count + SIZE_T preplace(void){ + *r_subject_ptr = replace(); + return *last_replace_counter; + } + + /// Perl compatible replace method with match evaluator. + /// Modifies subject string in-place and returns replace count. + /// MatchEvaluator class does not have a implementation of this replace method, thus it is not possible + /// to re-use match data with preplace() method. + /// Re-using match data with preplace doesn't actually make any sense, because new subject will + /// always require new match data. + /// + /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`. + /// @param me MatchEvaluator object. + /// @return replace count + SIZE_T preplace(MatchEvaluator me){ + *r_subject_ptr = me.setRegexObject(getRegexObject()) + .setSubject(r_subject_ptr) //do not use method + .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) + .setMatchContext(getMatchContext()) + .setMatchDataBlock(getMatchDataBlock()) + .setBufferSize(getBufferSize()) + .setStartOffset(getStartOffset()) + .replace(true, getPcre2Option(), last_replace_counter); + return *last_replace_counter; + } + + ///JPCRE2 native replace function. + ///A different name is adopted to + ///distinguish itself from the regular replace() function which + ///uses pcre2_substitute() to do the replacement; contrary to that, + ///it will provide a JPCRE2 native way of replacement operation. + ///It takes a MatchEvaluator object which provides a callback function that is used + ///to generate replacement string on the fly. Any replacement string set with + ///`RegexReplace::setReplaceWith()` function will have no effect. + ///The string returned by the callback function will be treated as literal and will + ///not go through any further processing. + /// + ///This function works on a copy of the MatchEvaluator, and thus makes no changes + ///to the original. The copy is modified as below: + /// + ///1. Global replacement will set FIND_ALL for match, unset otherwise. + ///2. Bad matching options such as `PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT` will be removed. + ///3. subject, start_offset and Regex object will change according to the RegexReplace object. + ///4. match context, and match data block will be changed according to the RegexReplace object. + /// + ///It calls MatchEvaluator::nreplace() on the MatchEvaluator object to perform the replacement. + /// + ///It always performs a new match. + ///@param me A MatchEvaluator object. + ///@return The resultant string after replacement. + ///@see MatchEvaluator::nreplace() + ///@see MatchEvaluator + ///@see MatchEvaluatorCallback + String nreplace(MatchEvaluator me){ + return me.setRegexObject(getRegexObject()) + .setSubject(getSubjectPointer()) + .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) + .setMatchContext(getMatchContext()) + .setMatchDataBlock(getMatchDataBlock()) + .setStartOffset(getStartOffset()) + .nreplace(true, getJpcre2Option(), last_replace_counter); + } + + ///PCRE2 compatible replace function that takes a MatchEvaluator. + ///String returned by callback function is processed by pcre2_substitute, + ///thus all PCRE2 substitute options are supported by this replace function. + /// + ///It always performs a new match. + ///@param me MatchEvaluator instance, (copied and modified according to this object). + ///@return resultant string. + ///@see replace() + String replace(MatchEvaluator me){ + return me.setRegexObject(getRegexObject()) + .setSubject(getSubjectPointer()) + .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) + .setMatchContext(getMatchContext()) + .setMatchDataBlock(getMatchDataBlock()) + .setBufferSize(getBufferSize()) + .setStartOffset(getStartOffset()) + .replace(true, getPcre2Option(), last_replace_counter); + } + }; + + + /** Provides public constructors to create Regex object. + * Each regex pattern needs an object of this class and each pattern needs to be compiled. + * Pattern compilation can be done using one of its' overloaded constructors or the `Regex::compile()` + * member function. + * + * Examples: + * + * ```cpp + * jp::Regex re; //does not perform a compile + * re.compile("pattern", "modifier"); + * jp::Regex re2("pattern", "modifier"); //performs a compile + * ``` + * + */ + class Regex { + + private: + + friend class RegexMatch; + friend class RegexReplace; + friend class MatchEvaluator; + + String pat_str; + String const *pat_str_ptr; + Pcre2Code *code; + Uint compile_opts; + Uint jpcre2_compile_opts; + ModifierTable const * modtab; + + CompileContext *ccontext; + std::vector tabv; + + + void init_vars() { + jpcre2_compile_opts = 0; + compile_opts = 0; + error_number = 0; + error_offset = 0; + code = 0; + pat_str_ptr = &pat_str; + ccontext = 0; + modtab = 0; + } + + void freeRegexMemory(void) { + Pcre2Func::code_free(code); + code = 0; //we may use it again + } + + void freeCompileContext(){ + Pcre2Func::compile_context_free(ccontext); + ccontext = 0; + } + + void onlyCopy(Regex const &r){ + //r.pat_str_ptr may point to other user data + pat_str_ptr = (r.pat_str_ptr == &r.pat_str) ? &pat_str //not r.pat_str + : r.pat_str_ptr; //other user data + + compile_opts = r.compile_opts; + jpcre2_compile_opts = r.jpcre2_compile_opts; + error_number = r.error_number; + error_offset = r.error_offset; + modtab = r.modtab; + } + + void deepCopy(Regex const &r) { + pat_str = r.pat_str; //must not use setPattern() here + + onlyCopy(r); + + //copy tables + tabv = r.tabv; + //copy ccontext if it's not null + freeCompileContext(); + ccontext = (r.ccontext) ? Pcre2Func::compile_context_copy(r.ccontext) : 0; + //if tabv is not empty and ccontext is ok (not null) set the table pointer to ccontext + if(ccontext && !tabv.empty()) Pcre2Func::set_character_tables(ccontext, &tabv[0]); + + //table pointer must be updated in the compiled code itself, jit memory copy is not available. + //copy is not going to work, we need a recompile. + //as all vars are already copied, we can just call compile() + r.code ? compile() //compile frees previous memory. + : freeRegexMemory(); + } + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + + void deepMove(Regex& r) { + pat_str = std::move_if_noexcept(r.pat_str); + + onlyCopy(r); + + //steal tables + tabv = std::move_if_noexcept(r.tabv); + + //steal ccontext + freeCompileContext(); + ccontext = r.ccontext; r.ccontext = 0; //must set this to 0 + if(ccontext && !tabv.empty()) Pcre2Func::set_character_tables(ccontext, &tabv[0]); + + //steal the code + freeRegexMemory(); + code = r.code; r.code = 0; //must set this to 0 + } + + #endif + + protected: + + int error_number; + PCRE2_SIZE error_offset; + + public: + + /// Default Constructor. + /// Initializes all class variables to defaults. + /// Does not perform any pattern compilation. + Regex() { + init_vars(); + } + + ///Compile pattern with initialization. + /// @param re Pattern string + Regex(String const &re) { + init_vars(); + compile(re); + } + + /// @overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + Regex(String const *re) { + init_vars(); + compile(re); + } + + ///@overload + /// @param re Pattern string . + /// @param mod Modifier string. + Regex(String const &re, Modifier const& mod) { + init_vars(); + compile(re, mod); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param mod Modifier string. + Regex(String const *re, Modifier const& mod) { + init_vars(); + compile(re, mod); + } + + ///@overload + /// @param re Pattern string . + /// @param po PCRE2 option value + Regex(String const &re, Uint po) { + init_vars(); + compile(re, po); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param po PCRE2 option value + Regex(String const *re, Uint po) { + init_vars(); + compile(re, po); + } + + ///@overload + /// @param re Pattern string . + /// @param po PCRE2 option value + /// @param jo JPCRE2 option value + Regex(String const &re, Uint po, Uint jo) { + init_vars(); + compile(re, po, jo); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param po PCRE2 option value + /// @param jo JPCRE2 option value + Regex(String const *re, Uint po, Uint jo) { + init_vars(); + compile(re, po, jo); + } + + /// @overload + ///... + /// Copy constructor. + /// A separate and new compile is performed from the copied options. + /// + /// @param r Constant Regex object reference. + Regex(Regex const &r) { + init_vars(); + deepCopy(r); + } + + /// Overloaded assignment operator. + /// @param r Regex const & + /// @return *this + Regex& operator=(Regex const &r) { + if (this == &r) return *this; + deepCopy(r); + return *this; + } + + + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + + + /// @overload + ///... + /// Move constructor. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + /// @param r rvalue reference to a Regex object. + Regex(Regex&& r) { + init_vars(); + deepMove(r); + } + + ///@overload + ///... + /// Overloaded move-assignment operator. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + /// @param r Regex&& + /// @return *this + Regex& operator=(Regex&& r) { + if (this == &r) return *this; + deepMove(r); + return *this; + } + + /// Provides boolean check for the status of the object. + /// This overloaded boolean operator needs to be declared + /// explicit to prevent implicit conversion and overloading issues. + /// + /// We will only enable it if >=C++11 is being used, as the explicit keyword + /// for a function other than constructor is not supported in older compilers. + /// + /// If you are dealing with legacy code/compilers use the Double bang trick mentioned + /// in Regex::operator!(). + /// + /// This helps us to check the status of the compiled regex like this: + /// + /// ``` + /// jpcre2::select::Regex re("pat", "mod"); + /// if(re) { + /// std::cout<<"Compile success"; + /// } else { + /// std::cout<<"Compile failed"; + /// } + /// ``` + ///@return true if regex compiled successfully, false otherwise. + /// + explicit operator bool() const { + return (code != 0); + } + #endif + + /// Provides boolean check for the status of the object. + /// This is a safe boolean approach (no implicit conversion or overloading). + /// We don't need the explicit keyword here and thus it's the preferable method + /// to check for object status that will work well with older compilers. + /// e.g: + /// + /// ``` + /// jpcre2::select::Regex re("pat","mod"); + /// if(!re) { + /// std::cout<<"Compile failed"; + /// } else { + /// std::cout<<"Compiled successfully"; + /// } + /// ``` + /// Double bang trick: + /// + /// ``` + /// jpcre2::select::Regex re("pat","mod"); + /// if(!!re) { + /// std::cout<<"Compiled successfully"; + /// } else { + /// std::cout<<"Compile failed"; + /// } + /// ``` + /// @return true if regex compile failed, false otherwise. + bool operator!() const { + return (code == 0); + } + + virtual ~Regex() { + freeRegexMemory(); + freeCompileContext(); + } + + ///Reset all class variables to its default (initial) state including memory. + ///@return Reference to the calling Regex object. + Regex& reset() { + freeRegexMemory(); + freeCompileContext(); + String().swap(pat_str); + init_vars(); + return *this; + } + + ///Clear all class variables to its default (initial) state (some memory may retain for further use). + ///@return Reference to the calling Regex object. + Regex& clear() { + freeRegexMemory(); + freeCompileContext(); + pat_str.clear(); + init_vars(); + return *this; + } + + ///Reset regex compile related errors to zero. + ///@return A reference to the Regex object + ///@see RegexReplace::resetErrors() + ///@see RegexMatch::resetErrors() + Regex& resetErrors() { + error_number = 0; + error_offset = 0; + return *this; + } + + /// Recreate character tables used by PCRE2. + /// You should call this function after changing the locale to remake the + /// character tables according to the new locale. + /// These character tables are used to compile the regex and used by match + /// and replace operation. A separate call to compile() will be required + /// to apply the new character tables. + /// @return Reference to the calling Regex object. + Regex& resetCharacterTables() { + const unsigned char* tables = Pcre2Func::maketables(0); //must pass 0, we are using free() to free the tables. + tabv = std::vector(tables, tables+1088); + ::free((void*)tables); //must free memory + if(!ccontext) + ccontext = Pcre2Func::compile_context_create(0); + Pcre2Func::set_character_tables(ccontext, &tabv[0]); + return *this; + } + + ///Get Pcre2 raw compiled code pointer. + ///@return pointer to constant pcre2_code or null. + Pcre2Code const* getPcre2Code() const{ + return code; + } + + /// Get pattern string + ///@return pattern string of type jpcre2::select::String + String getPattern() const { + return *pat_str_ptr; + } + + /// Get pointer to pattern string + ///@return Pointer to constant pattern string + String const * getPatternPointer() const { + return pat_str_ptr; + } + + ///Get number of captures from compiled code. + ///@return New line option value or 0. + Uint getNumCaptures() { + if(!code) return 0; + Uint numCaptures = 0; + int ret = Pcre2Func::pattern_info(code, PCRE2_INFO_CAPTURECOUNT, &numCaptures); + if(ret < 0) error_number = ret; + return numCaptures; + } + + /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. + /// + /// **Mixed or combined modifier**. + /// + /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers + /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they + /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options + /// get set, and when you remove the 'n' modifier (with `Regex::changeModifier()`), both will get removed. + ///@tparam Char_T Character type + ///@return Calculated modifier string (std::string) + ///@see RegexMatch::getModifier() + ///@see RegexReplace::getModifier() + std::string getModifier() const { + return modtab ? modtab->fromCompileOption(compile_opts, jpcre2_compile_opts) + : MOD::fromCompileOption(compile_opts, jpcre2_compile_opts); + } + + /// Get PCRE2 option + /// @return Compile time PCRE2 option value + ///@see RegexReplace::getPcre2Option() + ///@see RegexMatch::getPcre2Option() + Uint getPcre2Option() const { + return compile_opts; + } + + /// Get JPCRE2 option + /// @return Compile time JPCRE2 option value + ///@see RegexReplace::getJpcre2Option() + ///@see RegexMatch::getJpcre2Option() + Uint getJpcre2Option() const { + return jpcre2_compile_opts; + } + + /// Returns the last error number + ///@return Last error number + int getErrorNumber() const { + return error_number; + } + + /// Returns the last error offset + ///@return Last error offset + int getErrorOffset() const { + return (int)error_offset; + } + + /// Returns the last error message + ///@return Last error message + String getErrorMessage() const { + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + return select::getErrorMessage(error_number, error_offset); + #else + return select::getErrorMessage(error_number, error_offset); + #endif + } + + ///Get new line convention from compiled code. + ///@return New line option value or 0. + ///``` + ///PCRE2_NEWLINE_CR Carriage return only + ///PCRE2_NEWLINE_LF Linefeed only + ///PCRE2_NEWLINE_CRLF CR followed by LF only + ///PCRE2_NEWLINE_ANYCRLF Any of the above + ///PCRE2_NEWLINE_ANY Any Unicode newline sequence + ///``` + Uint getNewLine() { + if(!code) return 0; + Uint newline = 0; + int ret = Pcre2Func::pattern_info(code, PCRE2_INFO_NEWLINE, &newline); + if(ret < 0) error_number = ret; + return newline; + } + + ///Get the modifier table that is set, + ///@return constant ModifierTable pointer. + ModifierTable const* getModifierTable(){ + return modtab; + } + + + ///Set new line convention. + ///@param value New line option value. + ///``` + ///PCRE2_NEWLINE_CR Carriage return only + ///PCRE2_NEWLINE_LF Linefeed only + ///PCRE2_NEWLINE_CRLF CR followed by LF only + ///PCRE2_NEWLINE_ANYCRLF Any of the above + ///PCRE2_NEWLINE_ANY Any Unicode newline sequence + ///``` + ///@return Reference to the calling Regex object + Regex& setNewLine(Uint value){ + if(!ccontext) + ccontext = Pcre2Func::compile_context_create(0); + int ret = Pcre2Func::set_newline(ccontext, value); + if(ret < 0) error_number = ret; + return *this; + } + + /// Set the pattern string to compile + /// @param re Pattern string + /// @return Reference to the calling Regex object. + Regex& setPattern(String const &re) { + pat_str = re; + pat_str_ptr = &pat_str; //must overwrite + return *this; + } + + /// @overload + /// @param re Pattern string pointer, null pointer will unset it. + /// @return Reference to the calling Regex object. + Regex& setPattern(String const *re) { + if(re) pat_str_ptr = re; + else { + pat_str.clear(); + pat_str_ptr = &pat_str; + } + return *this; + } + + /// set the modifier (resets all JPCRE2 and PCRE2 options) by calling Regex::changeModifier(). + /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier and sets + /// equivalent PCRE2 and JPCRE2 options. + /// @param x Modifier string. + /// @return Reference to the calling Regex object. + /// @see RegexMatch::setModifier() + /// @see RegexReplace::setModifier() + Regex& setModifier(Modifier const& x) { + compile_opts = 0; + jpcre2_compile_opts = 0; + return changeModifier(x, true); + } + + ///Set a custom modifier table to be used. + ///@param mdt pointer to ModifierTable object. + /// @return Reference to the calling Regex object. + Regex& setModifierTable(ModifierTable const * mdt){ + modtab = mdt; + return *this; + } + + /// Set JPCRE2 option for compile (overwrites existing option) + /// @param x Option value + /// @return Reference to the calling Regex object. + /// @see RegexMatch::setJpcre2Option() + /// @see RegexReplace::setJpcre2Option() + Regex& setJpcre2Option(Uint x) { + jpcre2_compile_opts = x; + return *this; + } + + /// Set PCRE2 option for compile (overwrites existing option) + /// @param x Option value + /// @return Reference to the calling Regex object. + /// @see RegexMatch::setPcre2Option() + /// @see RegexReplace::setPcre2Option() + Regex& setPcre2Option(Uint x) { + compile_opts = x; + return *this; + } + + /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. + /// This function does not initialize or re-initialize options. + /// If you want to set options from scratch, initialize them to 0 before calling this function. + /// + /// If invalid modifier is detected, then the error number for the Regex + /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. + /// You can get the message with Regex::getErrorMessage() function. + /// @param mod Modifier string. + /// @param x Whether to add or remove option + /// @return Reference to the calling Regex object + /// @see RegexMatch::changeModifier() + /// @see RegexReplace::changeModifier() + Regex& changeModifier(Modifier const& mod, bool x){ + modtab ? modtab->toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset) + : MOD::toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset); + return *this; + } + + /// Add or remove a JPCRE2 option + /// @param opt JPCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling Regex object + /// @see RegexMatch::changeJpcre2Option() + /// @see RegexReplace::changeJpcre2Option() + Regex& changeJpcre2Option(Uint opt, bool x) { + jpcre2_compile_opts = x ? jpcre2_compile_opts | opt : jpcre2_compile_opts & ~opt; + return *this; + } + + /// Add or remove a PCRE2 option + /// @param opt PCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling Regex object + /// @see RegexMatch::changePcre2Option() + /// @see RegexReplace::changePcre2Option() + Regex& changePcre2Option(Uint opt, bool x) { + compile_opts = x ? compile_opts | opt : compile_opts & ~opt; + return *this; + } + + /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. + /// This is just a wrapper of the original function Regex::changeModifier() + /// provided for convenience. + /// @param mod Modifier string. + /// @return Reference to the calling Regex object + /// @see RegexMatch::addModifier() + /// @see RegexReplace::addModifier() + Regex& addModifier(Modifier const& mod){ + return changeModifier(mod, true); + } + + /// Add option to existing JPCRE2 options for compile + /// @param x Option value + /// @return Reference to the calling Regex object + /// @see RegexMatch::addJpcre2Option() + /// @see RegexReplace::addJpcre2Option() + Regex& addJpcre2Option(Uint x) { + jpcre2_compile_opts |= x; + return *this; + } + + /// Add option to existing PCRE2 options for compile + /// @param x Option value + /// @return Reference to the calling Regex object + /// @see RegexMatch::addPcre2Option() + /// @see RegexReplace::addPcre2Option() + Regex& addPcre2Option(Uint x) { + compile_opts |= x; + return *this; + } + + ///Compile pattern using info from class variables. + ///@see Regex::compile(String const &re, Uint po, Uint jo) + ///@see Regex::compile(String const &re, Uint po) + ///@see Regex::compile(String const &re, Modifier mod) + ///@see Regex::compile(String const &re) + void compile(void); + + ///@overload + ///... + /// Set the specified parameters, then compile the pattern using information from class variables. + /// @param re Pattern string + /// @param po PCRE2 option + /// @param jo JPCRE2 option + void compile(String const &re, Uint po, Uint jo) { + setPattern(re).setPcre2Option(po).setJpcre2Option(jo); + compile(); + } + + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param po PCRE2 option + /// @param jo JPCRE2 option + void compile(String const *re, Uint po, Uint jo) { + setPattern(re).setPcre2Option(po).setJpcre2Option(jo); + compile(); + } + + ///@overload + /// @param re Pattern string + /// @param po PCRE2 option + void compile(String const &re, Uint po) { + setPattern(re).setPcre2Option(po); + compile(); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param po PCRE2 option + void compile(String const *re, Uint po) { + setPattern(re).setPcre2Option(po); + compile(); + } + + /// @overload + /// @param re Pattern string + /// @param mod Modifier string. + void compile(String const &re, Modifier const& mod) { + setPattern(re).setModifier(mod); + compile(); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param mod Modifier string. + void compile(String const *re, Modifier const& mod) { + setPattern(re).setModifier(mod); + compile(); + } + + ///@overload + /// @param re Pattern string . + void compile(String const &re) { + setPattern(re); + compile(); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + void compile(String const *re) { + setPattern(re); + compile(); + } + + ///Returns a default constructed RegexMatch object by value. + ///This object is initialized with the same modifier table + ///as this Regex object. + ///@return RegexMatch object. + RegexMatch initMatch(){ + RegexMatch rm(this); + rm.setModifierTable(modtab); + return rm; + } + + ///Synonym for initMatch() + ///@return RegexMatch object by value. + RegexMatch getMatchObject(){ + return initMatch(); + } + + /// Perform regex match and return match count using a temporary match object. + /// This temporary match object will get available options from this Regex object, + /// that includes modifier table. + /// @param s Subject string . + /// @param mod Modifier string. + /// @param start_offset Offset from where matching will start in the subject string. + /// @return Match count + /// @see RegexMatch::match() + SIZE_T match(String const &s, Modifier const& mod, PCRE2_SIZE start_offset=0) { + return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match(); + } + + ///@overload + ///... + ///@param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject. + ///@param mod Modifier string. + ///@param start_offset Offset from where matching will start in the subject string. + ///@return Match count + SIZE_T match(String const *s, Modifier const& mod, PCRE2_SIZE start_offset=0) { + return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match(); + } + + ///@overload + ///... + /// @param s Subject string . + /// @param start_offset Offset from where matching will start in the subject string. + /// @return Match count + /// @see RegexMatch::match() + SIZE_T match(String const &s, PCRE2_SIZE start_offset=0) { + return initMatch().setStartOffset(start_offset).setSubject(s).match(); + } + + ///@overload + ///... + /// @param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject. + /// @param start_offset Offset from where matching will start in the subject string. + /// @return Match count + /// @see RegexMatch::match() + SIZE_T match(String const *s, PCRE2_SIZE start_offset=0) { + return initMatch().setStartOffset(start_offset).setSubject(s).match(); + } + + ///Returns a default constructed RegexReplace object by value. + ///This object is initialized with the same modifier table as this Regex object. + ///@return RegexReplace object. + RegexReplace initReplace(){ + RegexReplace rr(this); + rr.setModifierTable(modtab); + return rr; + } + + ///Synonym for initReplace() + ///@return RegexReplace object. + RegexReplace getReplaceObject(){ + return initReplace(); + } + + /// Perform regex replace and return the replaced string using a temporary replace object. + /// This temporary replace object will get available options from this Regex object, + /// that includes modifier table. + /// @param mains Subject string. + /// @param repl String to replace with + /// @param mod Modifier string. + ///@param counter Pointer to a counter to store the number of replacement done. + /// @return Resultant string after regex replace + /// @see RegexReplace::replace() + String replace(String const &mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) { + return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); + } + + ///@overload + /// @param mains Pointer to subject string + /// @param repl String to replace with + /// @param mod Modifier string. + ///@param counter Pointer to a counter to store the number of replacement done. + /// @return Resultant string after regex replace + /// @see RegexReplace::replace() + String replace(String *mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) { + return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); + } + + ///@overload + ///... + /// @param mains Subject string + /// @param repl Pointer to string to replace with + /// @param mod Modifier string. + ///@param counter Pointer to a counter to store the number of replacement done. + /// @return Resultant string after regex replace + /// @see RegexReplace::replace() + String replace(String const &mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) { + return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); + } + + ///@overload + ///... + /// @param mains Pointer to subject string + /// @param repl Pointer to string to replace with + /// @param mod Modifier string. + ///@param counter Pointer to a counter to store the number of replacement done. + /// @return Resultant string after regex replace + /// @see RegexReplace::replace() + String replace(String *mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) { + return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); + } + + /// Perl compatible replace method. + /// Modifies subject string in-place and returns replace count. + /// + /// It's a shorthand method to `RegexReplace::preplace()`. + /// @param mains Pointer to subject string. + /// @param repl Replacement string (string to replace with). + /// @param mod Modifier string. + /// @return replace count. + SIZE_T preplace(String * mains, String const& repl, Modifier const& mod=""){ + SIZE_T counter = 0; + if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); + return counter; + } + + /// @overload + /// + /// Perl compatible replace method. + /// Modifies subject string in-place and returns replace count. + /// + /// It's a shorthand method to `RegexReplace::preplace()`. + /// @param mains Pointer to subject string. + /// @param repl Pointer to replacement string (string to replace with). + /// @param mod Modifier string. + /// @return replace count. + SIZE_T preplace(String * mains, String const* repl, Modifier const& mod=""){ + SIZE_T counter = 0; + if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); + return counter; + } + + /// @overload + /// + /// Perl compatible replace method. + /// Returns replace count and discards subject string. + /// + /// It's a shorthand method to `RegexReplace::preplace()`. + /// @param mains Subject string. + /// @param repl Replacement string (string to replace with). + /// @param mod Modifier string. + /// @return replace count. + SIZE_T preplace(String const& mains, String const& repl, Modifier const& mod=""){ + SIZE_T counter = 0; + initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); + return counter; + } + + /// @overload + /// + /// Perl compatible replace method. + /// Returns replace count and discards subject string. + /// + /// It's a shorthand method to `RegexReplace::preplace()`. + /// @param mains Subject string. + /// @param repl Pointer to replacement string (string to replace with). + /// @param mod Modifier string. + /// @return replace count. + SIZE_T preplace(String const& mains, String const* repl, Modifier const& mod=""){ + SIZE_T counter = 0; + initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); + return counter; + } + }; + + private: + //prevent object instantiation of select class + select(); + select(select const &); + #ifdef JPCRE2_USE_MINIMUM_CXX_11 + select(select&&); + #endif + ~select(); +};//struct select +}//jpcre2 namespace + + +inline void jpcre2::ModifierTable::parseModifierTable(std::string& tabjs, VecOpt& tabjv, + std::string& tab_s, VecOpt& tab_v, + std::string const& tabs, VecOpt const& tabv){ + SIZE_T n = tabs.length(); + JPCRE2_ASSERT(n == tabv.size(), ("ValueError: Could not set Modifier table.\ + Modifier character and value tables are not of the same size (" + _tostdstring(n) + " == " + _tostdstring(tabv.size()) + ").").c_str()); + tabjs.clear(); + tab_s.clear(); tab_s.reserve(n); + tabjv.clear(); + tab_v.clear(); tab_v.reserve(n); + for(SIZE_T i=0;i class Map> +void jpcre2::select::Regex::compile() { +#else +template +void jpcre2::select::Regex::compile() { +#endif + //Get c_str of pattern + Pcre2Sptr c_pattern = (Pcre2Sptr) pat_str_ptr->c_str(); + int err_number = 0; + PCRE2_SIZE err_offset = 0; + + /************************************************************************** + * Compile the regular expression pattern, and handle + * any errors that are detected. + *************************************************************************/ + + //first release any previous memory + freeRegexMemory(); + code = Pcre2Func::compile( c_pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + compile_opts, /* default options */ + &err_number, /* for error number */ + &err_offset, /* for error offset */ + ccontext); /* use compile context */ + + if (code == 0) { + /* Compilation failed */ + //must not free regex memory, the only function has that right is the destructor + error_number = err_number; + error_offset = err_offset; + return; + } else if ((jpcre2_compile_opts & JIT_COMPILE) != 0) { + ///perform JIT compilation it it's enabled + int jit_ret = Pcre2Func::jit_compile(code, PCRE2_JIT_COMPLETE); + if(jit_ret < 0) error_number = jit_ret; + } + //everything's OK +} + + +#ifdef JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +typename jpcre2::select::String jpcre2::select::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) { +#else +template +typename jpcre2::select::String jpcre2::select::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) { +#endif + if(counter) *counter = 0; + + replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; + replace_opts &= ~PCRE2_SUBSTITUTE_GLOBAL; + Regex const * re = RegexMatch::getRegexObject(); + // If re or re->code is null, return the subject string unmodified. + if (!re || re->code == 0) + return RegexMatch::getSubject(); + + Pcre2Sptr r_subject_ptr = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str(); + //~ SIZE_T totlen = RegexMatch::getSubjectPointer()->length(); + + if(do_match) match(); + SIZE_T mcount = vec_soff.size(); + // if mcount is 0, return the subject string. (there's no need to worry about re) + if(!mcount) return RegexMatch::getSubject(); + SIZE_T current_offset = 0; //needs to be zero, not start_offset, because it's from where unmatched parts will be copied. + String res, tmp; + + //A check, this check is not fullproof. + SIZE_T last = vec_eoff.size(); + last = (last>0)?last-1:0; + JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\ + If you are using existing match data, try a new match."); + + //loop through the matches + for(SIZE_T i=0;ic_str(); + //substr(vec_soff[i], vec_eoff[i] - vec_soff[i]).c_str(); + Pcre2Sptr subject = r_subject_ptr + vec_soff[i]; + PCRE2_SIZE subject_length = vec_eoff[i] - vec_soff[i]; + + ///the string returned from the callback is the replacement string. + Pcre2Sptr replace = (Pcre2Sptr) tmp.c_str(); + PCRE2_SIZE replace_length = tmp.length(); + bool retry = true; + int ret = 0; + PCRE2_SIZE outlengthptr = 0; + Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1](); + + while (true) { + ret = Pcre2Func::substitute( + re->code, /*Points to the compiled pattern*/ + subject, /*Points to the subject string*/ + subject_length, /*Length of the subject string*/ + 0, /*Offset in the subject at which to start matching*/ //must be zero + replace_opts, /*Option bits*/ + RegexMatch::mdata, /*Points to a match data block, or is NULL*/ + RegexMatch::mcontext, /*Points to a match context, or is NULL*/ + replace, /*Points to the replacement string*/ + replace_length, /*Length of the replacement string*/ + output_buffer, /*Points to the output buffer*/ + &outlengthptr /*Points to the length of the output buffer*/ + ); + + if (ret < 0) { + //Handle errors + if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0 + && ret == (int) PCRE2_ERROR_NOMEMORY && retry) { + retry = false; + /// If initial #buffer_size wasn't big enough for resultant string, + /// we will try once more with a new buffer size adjusted to the length of the resultant string. + delete[] output_buffer; + output_buffer = new Pcre2Uchar[outlengthptr + 1](); + // Go and try to perform the substitute again + continue; + } else { + RegexMatch::error_number = ret; + delete[] output_buffer; + return RegexMatch::getSubject(); + } + } + //If everything's ok exit the loop + break; + } + res += String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) ); + delete[] output_buffer; + if(counter) *counter += ret; + //if FIND_ALL is not set, single match will be performed + if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break; + } + //All matched parts have been dealt with. + //now copy rest of the string from current_offset + res += RegexMatch::getSubject().substr(current_offset, String::npos); + return res; +} + + +#ifdef JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +typename jpcre2::select::String jpcre2::select::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){ +#else +template +typename jpcre2::select::String jpcre2::select::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){ +#endif + if(counter) *counter = 0; + if(do_match) match(); + SIZE_T mcount = vec_soff.size(); + // if mcount is 0, return the subject string. (there's no need to worry about re) + if(!mcount) return RegexMatch::getSubject(); + SIZE_T current_offset = 0; //no need for worrying about start offset, it's handled by match and we get valid offsets out of it. + String res; + + //A check, this check is not fullproof + SIZE_T last = vec_eoff.size(); + last = (last>0)?last-1:0; + JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\ + If you are using existing match data, try a new match."); + + //loop through the matches + for(SIZE_T i=0;i class Map> +typename jpcre2::select::String jpcre2::select::RegexReplace::replace() { +#else +template +typename jpcre2::select::String jpcre2::select::RegexReplace::replace() { +#endif + *last_replace_counter = 0; + + // If re or re->code is null, return the subject string unmodified. + if (!re || re->code == 0) + return *r_subject_ptr; + + Pcre2Sptr subject = (Pcre2Sptr) r_subject_ptr->c_str(); + PCRE2_SIZE subject_length = r_subject_ptr->length(); + Pcre2Sptr replace = (Pcre2Sptr) r_replw_ptr->c_str(); + PCRE2_SIZE replace_length = r_replw_ptr->length(); + PCRE2_SIZE outlengthptr = (PCRE2_SIZE) buffer_size; + bool retry = true; + int ret = 0; + Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1](); + + while (true) { + ret = Pcre2Func::substitute( + re->code, /*Points to the compiled pattern*/ + subject, /*Points to the subject string*/ + subject_length, /*Length of the subject string*/ + _start_offset, /*Offset in the subject at which to start matching*/ + replace_opts, /*Option bits*/ + mdata, /*Points to a match data block, or is NULL*/ + mcontext, /*Points to a match context, or is NULL*/ + replace, /*Points to the replacement string*/ + replace_length, /*Length of the replacement string*/ + output_buffer, /*Points to the output buffer*/ + &outlengthptr /*Points to the length of the output buffer*/ + ); + + if (ret < 0) { + //Handle errors + if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0 + && ret == (int) PCRE2_ERROR_NOMEMORY && retry) { + retry = false; + /// If initial #buffer_size wasn't big enough for resultant string, + /// we will try once more with a new buffer size adjusted to the length of the resultant string. + delete[] output_buffer; + output_buffer = new Pcre2Uchar[outlengthptr + 1](); + // Go and try to perform the substitute again + continue; + } else { + error_number = ret; + delete[] output_buffer; + return *r_subject_ptr; + } + } + //If everything's ok exit the loop + break; + } + *last_replace_counter += ret; + String result = String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) ); + delete[] output_buffer; + return result; +} + + +#ifdef JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +bool jpcre2::select::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector, uint32_t ovector_count) { +#else +template +bool jpcre2::select::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector, uint32_t ovector_count) { +#endif + NumSub num_sub; + uint32_t rcu = rc; + num_sub.reserve(rcu); //we know exactly how many elements it will have. + uint32_t i; + for (i = 0u; i < ovector_count; i++) { + if (ovector[2*i] != PCRE2_UNSET) + num_sub.push_back(String((Char*)(subject + ovector[2*i]), ovector[2*i+1] - ovector[2*i])); + else + #ifdef JPCRE2_UNSET_CAPTURES_NULL + num_sub.push_back(std::nullopt); + #else + num_sub.push_back(String()); + #endif + } + vec_num->push_back(num_sub); //this function shouldn't be called if this vector is null + return true; +} + + +#ifdef JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +bool jpcre2::select::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size, + Pcre2Sptr name_table, + Pcre2Sptr subject, PCRE2_SIZE* ovector ) { +#else +template +bool jpcre2::select::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size, + Pcre2Sptr name_table, + Pcre2Sptr subject, PCRE2_SIZE* ovector ) { +#endif + Pcre2Sptr tabptr = name_table; + String key; + MapNas map_nas; + MapNtN map_ntn; + for (int i = 0; i < namecount; i++) { + int n; + if(sizeof( Char_T ) * CHAR_BIT == 8){ + n = (int)((tabptr[0] << 8) | tabptr[1]); + key = toString((Char*) (tabptr + 2)); + } + else{ + n = (int)tabptr[0]; + key = toString((Char*) (tabptr + 1)); + } + //Use of tabptr is finished for this iteration, let's increment it now. + tabptr += name_entry_size; + String value((Char*)(subject + ovector[2*n]), ovector[2*n+1] - ovector[2*n]); //n, not i. + if(vec_nas) map_nas[key] = value; + if(vec_ntn) map_ntn[key] = n; + } + //push the maps into vectors: + if(vec_nas) vec_nas->push_back(map_nas); + if(vec_ntn) vec_ntn->push_back(map_ntn); + return true; +} + + +#ifdef JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +jpcre2::SIZE_T jpcre2::select::RegexMatch::match() { +#else +template +jpcre2::SIZE_T jpcre2::select::RegexMatch::match() { +#endif + + // If re or re->code is null, return 0 as the match count + if (!re || re->code == 0) + return 0; + + Pcre2Sptr subject = (Pcre2Sptr) m_subject_ptr->c_str(); + Pcre2Sptr name_table = 0; + int crlf_is_newline = 0; + int namecount = 0; + int name_entry_size = 0; + int rc = 0; + uint32_t ovector_count = 0; + int utf = 0; + SIZE_T count = 0; + Uint option_bits; + Uint newline = 0; + PCRE2_SIZE *ovector = 0; + SIZE_T subject_length = 0; + MatchData *match_data = 0; + subject_length = m_subject_ptr->length(); + bool mdc = false; //mdata created. + + + if (vec_num) vec_num->clear(); + if (vec_nas) vec_nas->clear(); + if (vec_ntn) vec_ntn->clear(); + if(vec_soff) vec_soff->clear(); + if(vec_eoff) vec_eoff->clear(); + + + /* Using this function ensures that the block is exactly the right size for + the number of capturing parentheses in the pattern. */ + if(mdata) match_data = mdata; + else { + match_data = Pcre2Func::match_data_create_from_pattern(re->code, 0); + mdc = true; + } + + rc = Pcre2Func::match( re->code, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + _start_offset, /* start at offset 'start_offset' in the subject */ + match_opts, /* default options */ + match_data, /* block for storing the result */ + mcontext); /* use default match context */ + + /* Matching failed: handle error cases */ + + if (rc < 0) { + if(mdc) + Pcre2Func::match_data_free(match_data); /* Release memory used for the match */ + //must not free code. This function has no right to modify regex + switch (rc) { + case PCRE2_ERROR_NOMATCH: + return count; + /* + Handle other special cases if you like + */ + default:; + } + error_number = rc; + return count; + } + + ++count; //Increment the counter + /* Match succeded. Get a pointer to the output vector, where string offsets are + stored. */ + ovector = Pcre2Func::get_ovector_pointer(match_data); + ovector_count = Pcre2Func::get_ovector_count(match_data); + + /************************************************************************//* + * We have found the first match within the subject string. If the output * + * vector wasn't big enough, say so. Then output any substrings that were * + * captured. * + *************************************************************************/ + + /* The output vector wasn't big enough. This should not happen, because we used + pcre2_match_data_create_from_pattern() above. */ + + if (rc == 0) { + //ovector was not big enough for all the captured substrings; + error_number = (int)ERROR::INSUFFICIENT_OVECTOR; + rc = ovector_count; + // TODO: We may throw exception at this point. + } + //match succeeded at offset ovector[0] + if(vec_soff) vec_soff->push_back(ovector[0]); + if(vec_eoff) vec_eoff->push_back(ovector[1]); + + // Get numbered substrings if vec_num isn't null + if (vec_num) { //must do null check + if(!getNumberedSubstrings(rc, subject, ovector, ovector_count)) + return count; + } + + //get named substrings if either vec_nas or vec_ntn is given. + if (vec_nas || vec_ntn) { + /* See if there are any named substrings, and if so, show them by name. First + we have to extract the count of named parentheses from the pattern. */ + + (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ + PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ + &namecount); /* where to put the answer */ + + if (namecount <= 0); /*No named substrings*/ + + else { + /* Before we can access the substrings, we must extract the table for + translating names to numbers, and the size of each entry in the table. */ + + (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ + PCRE2_INFO_NAMETABLE, /* address of the table */ + &name_table); /* where to put the answer */ + + (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ + PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ + &name_entry_size); /* where to put the answer */ + + /* Now we can scan the table and, for each entry, print the number, the name, + and the substring itself. In the 8-bit library the number is held in two + bytes, most significant first. */ + + + // Get named substrings if vec_nas isn't null. + // Get name to number map if vec_ntn isn't null. + } + //the following must be outside the above if-else + if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector)) + return count; + } + + /***********************************************************************//* + * If the "g" modifier was given, we want to continue * + * to search for additional matches in the subject string, in a similar * + * way to the /g option in Perl. This turns out to be trickier than you * + * might think because of the possibility of matching an empty string. * + * What happens is as follows: * + * * + * If the previous match was NOT for an empty string, we can just start * + * the next match at the end of the previous one. * + * * + * If the previous match WAS for an empty string, we can't do that, as it * + * would lead to an infinite loop. Instead, a call of pcre2_match() is * + * made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The * + * first of these tells PCRE2 that an empty string at the start of the * + * subject is not a valid match; other possibilities must be tried. The * + * second flag restricts PCRE2 to one match attempt at the initial string * + * position. If this match succeeds, an alternative to the empty string * + * match has been found, and we can print it and proceed round the loop, * + * advancing by the length of whatever was found. If this match does not * + * succeed, we still stay in the loop, advancing by just one character. * + * In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be * + * more than one byte. * + * * + * However, there is a complication concerned with newlines. When the * + * newline convention is such that CRLF is a valid newline, we must * + * advance by two characters rather than one. The newline convention can * + * be set in the regex by (*CR), etc.; if not, we must find the default. * + *************************************************************************/ + + if ((jpcre2_match_opts & FIND_ALL) == 0) { + if(mdc) + Pcre2Func::match_data_free(match_data); /* Release the memory that was used */ + // Must not free code. This function has no right to modify regex. + return count; /* Exit the program. */ + } + + /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline + sequence. First, find the options with which the regex was compiled and extract + the UTF state. */ + + (void) Pcre2Func::pattern_info(re->code, PCRE2_INFO_ALLOPTIONS, &option_bits); + utf = ((option_bits & PCRE2_UTF) != 0); + + /* Now find the newline convention and see whether CRLF is a valid newline + sequence. */ + + (void) Pcre2Func::pattern_info(re->code, PCRE2_INFO_NEWLINE, &newline); + crlf_is_newline = newline == PCRE2_NEWLINE_ANY + || newline == PCRE2_NEWLINE_CRLF + || newline == PCRE2_NEWLINE_ANYCRLF; + + /** We got the first match. Now loop for second and subsequent matches. */ + + for (;;) { + + Uint options = match_opts; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + + /* If the previous match was for an empty string, we are finished if we are + at the end of the subject. Otherwise, arrange to run another match at the + same point to see if a non-empty match can be found. */ + + if (ovector[0] == ovector[1]) { + if (ovector[0] == subject_length) + break; + options |= PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + + /// Run the next matching operation */ + + rc = Pcre2Func::match( re->code, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + start_offset, /* starting offset in the subject */ + options, /* options */ + match_data, /* block for storing the result */ + mcontext); /* use match context */ + + /* This time, a result of NOMATCH isn't an error. If the value in "options" + is zero, it just means we have found all possible matches, so the loop ends. + Otherwise, it means we have failed to find a non-empty-string match at a + point where there was a previous empty-string match. In this case, we do what + Perl does: advance the matching position by one character, and continue. We + do this by setting the "end of previous match" offset, because that is picked + up at the top of the loop as the point at which to start again. + + There are two complications: (a) When CRLF is a valid newline sequence, and + the current position is just before it, advance by an extra byte. (b) + Otherwise we must ensure that we skip an entire UTF character if we are in + UTF mode. */ + + if (rc == PCRE2_ERROR_NOMATCH) { + if (options == 0) + break; /* All matches found */ + ovector[1] = start_offset + 1; /* Advance one code unit */ + if (crlf_is_newline && /* If CRLF is newline & */ + start_offset < subject_length - 1 && /* we are at CRLF, */ + subject[start_offset] == '\r' && subject[start_offset + 1] == '\n') + ovector[1] += 1; /* Advance by one more. */ + else if (utf) { /* advance a whole UTF (8 or 16), for UTF-32, it's not needed */ + while (ovector[1] < subject_length) { + if(sizeof( Char_T ) * CHAR_BIT == 8 && (subject[ovector[1]] & 0xc0) != 0x80) break; + else if(sizeof( Char_T ) * CHAR_BIT == 16 && (subject[ovector[1]] & 0xfc00) != 0xdc00) break; + else if(sizeof( Char_T ) * CHAR_BIT == 32) break; //must be else if + ovector[1] += 1; + } + } + continue; /* Go round the loop again */ + } + + /* Other matching errors are not recoverable. */ + + if (rc < 0) { + if(mdc) + Pcre2Func::match_data_free(match_data); + // Must not free code. This function has no right to modify regex. + error_number = rc; + return count; + } + + /* match succeeded */ + ++count; //Increment the counter + + if (rc == 0) { + /* The match succeeded, but the output vector wasn't big enough. This + should not happen. */ + error_number = (int)ERROR::INSUFFICIENT_OVECTOR; + rc = ovector_count; + // TODO: We may throw exception at this point. + } + + //match succeded at ovector[0] + if(vec_soff) vec_soff->push_back(ovector[0]); + if(vec_eoff) vec_eoff->push_back(ovector[1]); + + /* As before, get substrings stored in the output vector by number, and then + also any named substrings. */ + + // Get numbered substrings if vec_num isn't null + if (vec_num) { //must do null check + if(!getNumberedSubstrings(rc, subject, ovector, ovector_count)) + return count; + } + + if (vec_nas || vec_ntn) { + //must call this whether we have named substrings or not: + if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector)) + return count; + } + } /* End of loop to find second and subsequent matches */ + + if(mdc) + Pcre2Func::match_data_free(match_data); + // Must not free code. This function has no right to modify regex. + return count; +} + +#undef JPCRE2_VECTOR_DATA_ASSERT +#undef JPCRE2_UNUSED +#undef JPCRE2_USE_MINIMUM_CXX_11 + +//some macro documentation for doxygen + +#ifdef __DOXYGEN__ + + +#ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK +#define JPCRE2_USE_FUNCTION_POINTER_CALLBACK +#endif + +#ifndef JPCRE2_NDEBUG +#define JPCRE2_NDEBUG +#endif + + +///@def JPCRE2_USE_FUNCTION_POINTER_CALLBACK +///Use function pointer in all cases for MatchEvaluatorCallback function. +///By default function pointer is used for callback in MatchEvaluator when using =C++11` compiler `std::function` instead of function pointer is used. +///If this macro is defined before including jpcre2.hpp, function pointer will be used in all cases. +///It you are using lambda function with captures, stick with `std::function`, on the other hand, if +///you are using older compilers, you might want to use function pointer instead. +/// +///For example, with gcc-4.7, `std::function` will give compile error in C++11 mode, in such cases where full C++11 +///support is not available, use function pointer. + + +///@def JPCRE2_ASSERT(cond, msg) +///Macro to call `jpcre2::jassert()` with file path and line number. +///When `NDEBUG` or `JPCRE2_NDEBUG` is defined before including this header, this macro will +///be defined as `((void)0)` thus eliminating this assertion. +///@param cond condtion (boolean) +///@param msg message + + +///@def JPCRE2_NDEBUG +///Macro to remove debug codes. +///Using this macro is discouraged even in production mode but provided for completeness. +///You should not use this macro to bypass any error in your program. +///Define this macro before including this header if you want to remove debug codes included in this library. +/// +///Using the standard `NDEBUG` macro will have the same effect, +///but it is recommended that you use `JPCRE2_NDEBUG` to strip out debug codes specifically for this library. + + +///@def JPCRE2_UNSET_CAPTURES_NULL +///Define to change the type of NumSub so that captures are recorded +///with std::optional. It is undefined by default. This feature requires C++17. + +#endif + + +#endif diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index 14712ff11..2bf858f69 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -57,7 +57,7 @@ #include "rowstorage.h" //..comment out NDEBUG to enable assertions, uncomment NDEBUG to disable -// #define NDEBUG +//#define NDEBUG #include "mcs_decimal.h" using namespace std; @@ -315,7 +315,7 @@ void RowAggregation::updateStringMinMax(utils::NullString val1, utils::NullStrin if (val1.isNull()) { // as any comparison with NULL is false, it should not affect min/max ranges. - return; // do nothing. + return ; // do nothing. } CHARSET_INFO* cs = fRow.getCharset(col); int tmp = cs->strnncoll(val1.str(), val1.length(), val2.str(), val2.length()); @@ -810,9 +810,8 @@ void RowAggregation::aggregateRow(Row& row, const uint64_t* hash, std::vector* rgContextColl) { uint32_t cnt = fRollupFlag ? fGroupByCols.size() : 1; - for (uint32_t z = 0; z < cnt; z++) - { - // groupby column list is not empty, find the entry. + for (uint32_t z = 0; z < cnt; z++) { + // groupby column list is not empty, find the entry. if (!fGroupByCols.empty()) { bool is_new_row; @@ -857,8 +856,7 @@ void RowAggregation::aggregateRow(Row& row, const uint64_t* hash, updateEntry(row, rgContextColl); // these quantities are unsigned and comparing z and cnt - 1 can be incorrect // because cnt can be zero. - if ((z + 1 < cnt)) - { + if ((z + 1 < cnt)) { // if we are rolling up, we mark appropriate field as NULL and also increment // value in the "mark" column, so that we can differentiate between data and // various rollups. @@ -1171,8 +1169,8 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i { if (LIKELY(rowIn.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH)) { - updateIntMinMax(rowIn.getTSInt128Field(colIn).getValue(), fRow.getTSInt128Field(colOut).getValue(), - colOut, funcType); + updateIntMinMax(rowIn.getTSInt128Field(colIn).getValue(), fRow.getTSInt128Field(colOut).getValue(), colOut, + funcType); } else if (rowIn.getColumnWidth(colIn) <= datatypes::MAXLEGACYWIDTH) { @@ -1254,6 +1252,108 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i } } } +//------------------------------------------------------------------------------ +// Update the fields with anything that goes in. +// rowIn(in) - Row to be included in aggregation. +// colIn(in) - column in the input row group +// colOut(in) - column in the output row group +//------------------------------------------------------------------------------ +void RowAggregation::doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOut) +{ + int colDataType = (fRowGroupIn.getColTypes())[colIn]; + + switch (colDataType) + { + case execplan::CalpontSystemCatalog::UTINYINT: + case execplan::CalpontSystemCatalog::USMALLINT: + case execplan::CalpontSystemCatalog::UMEDINT: + case execplan::CalpontSystemCatalog::UINT: + case execplan::CalpontSystemCatalog::UBIGINT: + case execplan::CalpontSystemCatalog::TINYINT: + case execplan::CalpontSystemCatalog::SMALLINT: + case execplan::CalpontSystemCatalog::MEDINT: + case execplan::CalpontSystemCatalog::INT: + case execplan::CalpontSystemCatalog::BIGINT: + { + fRow.setIntField(rowIn.getIntField(colIn), colOut); + break; + } + + case execplan::CalpontSystemCatalog::DECIMAL: + case execplan::CalpontSystemCatalog::UDECIMAL: + { + if (LIKELY(rowIn.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH)) + { + fRow.setInt128Field(rowIn.getTSInt128Field(colIn).getValue(), colOut); + } + else if (rowIn.getColumnWidth(colIn) <= datatypes::MAXLEGACYWIDTH) + { + fRow.setIntField(rowIn.getIntField(colIn), colOut); + } + else + { + idbassert(0); + throw std::logic_error("RowAggregation::doMinMax(): DECIMAL bad length."); + } + + break; + } + + case execplan::CalpontSystemCatalog::CHAR: + case execplan::CalpontSystemCatalog::VARCHAR: + case execplan::CalpontSystemCatalog::TEXT: + { + auto valIn = rowIn.getStringField(colIn); + fRow.setStringField(valIn, colOut); + break; + } + + case execplan::CalpontSystemCatalog::DOUBLE: + case execplan::CalpontSystemCatalog::UDOUBLE: + { + double valIn = rowIn.getDoubleField(colIn); + fRow.setDoubleField(valIn, colOut); + break; + } + + case execplan::CalpontSystemCatalog::FLOAT: + case execplan::CalpontSystemCatalog::UFLOAT: + { + float valIn = rowIn.getFloatField(colIn); + fRow.setFloatField(valIn, colOut); + break; + } + + case execplan::CalpontSystemCatalog::DATE: + case execplan::CalpontSystemCatalog::DATETIME: + case execplan::CalpontSystemCatalog::TIMESTAMP: + case execplan::CalpontSystemCatalog::TIME: + { + uint64_t valIn = rowIn.getUintField(colIn); + fRow.setUintField(valIn, colOut); + break; + } + + case execplan::CalpontSystemCatalog::LONGDOUBLE: + { + long double valIn = rowIn.getLongDoubleField(colIn); + fRow.setLongDoubleField(valIn, colOut); + break; + } + + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: + { + fRow.setVarBinaryField(rowIn.getVarBinaryField(colIn), rowIn.getVarBinaryLength(colIn), colOut); + break; + } + default: + { + idbassert_s(0, "unknown data type in doSelectSome()"); + break; + } + } +} //------------------------------------------------------------------------------ // Update the sum fields if input is not null. @@ -1723,6 +1823,11 @@ void RowAggregation::updateEntry(const Row& rowIn, std::vector>& inRows) @@ -4700,6 +4802,12 @@ void RowAggregationDistinct::updateEntry(const Row& rowIn, std::vector* rgContextColl = nullptr); void mergeEntries(const Row& row); virtual void doMinMax(const Row&, int64_t, int64_t, int); + virtual void doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOut); virtual void doSum(const Row&, int64_t, int64_t, int); virtual void doAvg(const Row&, int64_t, int64_t, int64_t, bool merge = false); virtual void doStatistics(const Row&, int64_t, int64_t, int64_t); @@ -677,17 +681,6 @@ class RowAggregationUM : public RowAggregation */ bool nextRowGroup(); - /** @brief Returns aggregated rows in a RowGroup as long as there are still not returned result RowGroups. - * - * This function should be called repeatedly until false is returned (meaning end of data). - * Returns data from in-memory storage, as well as spilled data from disk. If disk-based aggregation is - * happening, finalAggregation() should be called before returning result RowGroups to finalize the used - * RowAggStorages, merge different spilled generations and obtain correct aggregation results. - * - * @returns True if there are more result RowGroups, else false if all results have been returned. - */ - bool nextOutputRowGroup(); - /** @brief Add an aggregator for DISTINCT aggregation */ void distinctAggregator(const boost::shared_ptr& da) diff --git a/utils/rowgroup/rowstorage.cpp b/utils/rowgroup/rowstorage.cpp index 9b9e47d1c..585dc8ed0 100644 --- a/utils/rowgroup/rowstorage.cpp +++ b/utils/rowgroup/rowstorage.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include "rowgroup.h" #include #include @@ -80,11 +79,6 @@ std::string errorString(int errNo) auto* buf = strerror_r(errNo, tmp, sizeof(tmp)); return {buf}; } - -size_t findFirstSetBit(const uint64_t mask) -{ - return __builtin_ffsll(mask); -} } // anonymous namespace namespace rowgroup @@ -558,7 +552,7 @@ class Dumper class RowGroupStorage { public: - using RGDataStorage = std::vector; + using RGDataStorage = std::vector>; public: /** @brief Default constructor @@ -619,54 +613,6 @@ class RowGroupStorage return fRowGroupOut->getSizeWithStrings(fMaxRows); } - // This shifts data within RGData such that it compacts the non finalized rows - PosOpos shiftRowsInRowGroup(RGDataUnPtr& rgdata, uint64_t fgid, uint64_t tgid) - { - uint64_t pos = 0; - uint64_t opos = 0; - - fRowGroupOut->setData(rgdata.get()); - for (auto i = fgid; i < tgid; ++i) - { - if ((i - fgid) * HashMaskElements >= fRowGroupOut->getRowCount()) - break; - uint64_t mask = ~fFinalizedRows[i]; - if ((i - fgid + 1) * HashMaskElements > fRowGroupOut->getRowCount()) - { - mask &= (~0ULL) >> ((i - fgid + 1) * HashMaskElements - fRowGroupOut->getRowCount()); - } - opos = (i - fgid) * HashMaskElements; - - if (mask == ~0ULL) - { - if (LIKELY(pos != opos)) - moveRows(rgdata.get(), pos, opos, HashMaskElements); - pos += HashMaskElements; - continue; - } - - if (mask == 0) - continue; - - while (mask != 0) - { - // find position until block full of not finalized rows. - size_t b = findFirstSetBit(mask); - size_t e = findFirstSetBit(~(mask >> b)) + b; - if (UNLIKELY(e >= HashMaskElements)) - mask = 0; - else - mask >>= e; - if (LIKELY(pos != opos + b - 1)) - moveRows(rgdata.get(), pos, opos + b - 1, e - b); - pos += e - b; - opos += e; - } - --opos; - } - return {pos, opos}; - } - /** @brief Take away RGDatas from another RowGroupStorage * * If some of the RGDatas is not in the memory do not load them, @@ -680,7 +626,7 @@ class RowGroupStorage } void append(RowGroupStorage* o) { - RGDataUnPtr rgd; + std::unique_ptr rgd; std::string ofname; while (o->getNextRGData(rgd, ofname)) { @@ -720,130 +666,11 @@ class RowGroupStorage } } - /** @brief Get the last RGData from fRGDatas, remove it from the vector and return its id. - * - * @param rgdata The RGData to be retrieved - */ - uint64_t getLastRGData(RGDataUnPtr& rgdata) - { - assert(!fRGDatas.empty()); - uint64_t rgid = fRGDatas.size() - 1; - rgdata = std::move(fRGDatas[rgid]); - fRGDatas.pop_back(); - return rgid; - } - - static FgidTgid calculateGids(const uint64_t rgid, const uint64_t fMaxRows) - { - // Calculate from first and last uint64_t entry in fFinalizedRows BitMap - // which contains information about rows in the RGData. - uint64_t fgid = rgid * fMaxRows / HashMaskElements; - uint64_t tgid = fgid + fMaxRows / HashMaskElements; - return {fgid, tgid}; - } - - /** @brief Used to output aggregation results from memory and disk in the current generation in the form of - * RGData. Returns next RGData, loads from disk if necessary. Skips finalized rows as they would contain - * duplicate results, compacts actual rows into start of RGData and adapts number of rows transmitted in - * RGData. - * @returns A pointer to the next RGData or an empty pointer if there are no more RGDatas in this - * generation. - */ - bool getNextOutputRGData(RGDataUnPtr& rgdata) - { - if (UNLIKELY(fRGDatas.empty())) - { - fMM->release(); - return false; - } - - while (!fRGDatas.empty()) - { - auto rgid = getLastRGData(rgdata); - auto [fgid, tgid] = calculateGids(rgid, fMaxRows); - - if (fFinalizedRows.size() <= fgid) - { - // There are no finalized rows in this RGData. We can just return it. - // Load from disk if necessary and unlink DumpFile. - if (!rgdata) - { - loadRG(rgid, rgdata, true); - } - return true; - } - - if (tgid >= fFinalizedRows.size()) - fFinalizedRows.resize(tgid + 1, 0ULL); - - // Check if there are rows to process - bool hasReturnRows = false; - for (auto i = fgid; i < tgid; ++i) - { - if (fFinalizedRows[i] != ~0ULL) - { - // Not all rows are finalized, we have to return at least parts of this RGData - hasReturnRows = true; - break; - } - } - - if (rgdata) - { - // RGData is currently in memory - if (!hasReturnRows) - { - // All rows are finalized, don't return this RGData - continue; - } - } - else - { - if (hasReturnRows) - { - // Load RGData from disk, unlink dump file and continue processing - loadRG(rgid, rgdata, true); - } - else - { - // All rows are finalized. Unlink dump file and continue search for return RGData - unlink(makeRGFilename(rgid).c_str()); - continue; - } - } - - auto [pos, opos] = shiftRowsInRowGroup(rgdata, fgid, tgid); - - // Nothing got shifted at all -> all rows must be finalized. If all rows finalized remove - // RGData and file and don't give it out. - if (pos == 0) - { - fLRU->remove(rgid); - unlink(makeRGFilename(rgid).c_str()); - continue; - } - - // set RGData with number of not finalized rows which have been compacted at front of RGData - fRowGroupOut->setData(rgdata.get()); - fRowGroupOut->setRowCount(pos); - int64_t memSz = fRowGroupOut->getSizeWithStrings(fMaxRows); - - // Release the memory used by the current rgdata from this MemoryManager. - fMM->release(memSz); - unlink(makeRGFilename(rgid).c_str()); - - // to periodically clean up freed memory so it can be used by other threads. - fLRU->remove(rgid); - return true; - } - return false; - } - /** @brief Returns next RGData, load it from disk if necessary. * * @returns pointer to the next RGData or empty pointer if there is nothing */ - RGDataUnPtr getNextRGData() + std::unique_ptr getNextRGData() { while (!fRGDatas.empty()) { @@ -1203,7 +1030,7 @@ class RowGroupStorage * @param fname(out) Filename of the dump if it's not in the memory * @returns true if there is available RGData */ - bool getNextRGData(RGDataUnPtr& rgdata, std::string& fname) + bool getNextRGData(std::unique_ptr& rgdata, std::string& fname) { if (UNLIKELY(fRGDatas.empty())) { @@ -1212,9 +1039,12 @@ class RowGroupStorage } while (!fRGDatas.empty()) { - auto rgid = getLastRGData(rgdata); - auto [fgid, tgid] = calculateGids(rgid, fMaxRows); + uint64_t rgid = fRGDatas.size() - 1; + rgdata = std::move(fRGDatas[rgid]); + fRGDatas.pop_back(); + uint64_t fgid = rgid * fMaxRows / 64; + uint64_t tgid = fgid + fMaxRows / 64; if (fFinalizedRows.size() > fgid) { if (tgid >= fFinalizedRows.size()) @@ -1238,7 +1068,45 @@ class RowGroupStorage continue; } - auto [pos, opos] = shiftRowsInRowGroup(rgdata, fgid, tgid); + uint64_t pos = 0; + uint64_t opos = 0; + fRowGroupOut->setData(rgdata.get()); + for (auto i = fgid; i < tgid; ++i) + { + if ((i - fgid) * 64 >= fRowGroupOut->getRowCount()) + break; + uint64_t mask = ~fFinalizedRows[i]; + if ((i - fgid + 1) * 64 > fRowGroupOut->getRowCount()) + { + mask &= (~0ULL) >> ((i - fgid + 1) * 64 - fRowGroupOut->getRowCount()); + } + opos = (i - fgid) * 64; + if (mask == ~0ULL) + { + if (LIKELY(pos != opos)) + moveRows(rgdata.get(), pos, opos, 64); + pos += 64; + continue; + } + + if (mask == 0) + continue; + + while (mask != 0) + { + size_t b = __builtin_ffsll(mask); + size_t e = __builtin_ffsll(~(mask >> b)) + b; + if (UNLIKELY(e >= 64)) + mask = 0; + else + mask >>= e; + if (LIKELY(pos != opos + b - 1)) + moveRows(rgdata.get(), pos, opos + b - 1, e - b); + pos += e - b; + opos += e; + } + --opos; + } if (pos == 0) { @@ -1251,7 +1119,6 @@ class RowGroupStorage fRowGroupOut->setRowCount(pos); } - // Release the memory used by the current rgdata. if (rgdata) { fRowGroupOut->setData(rgdata.get()); @@ -1263,7 +1130,6 @@ class RowGroupStorage { fname = makeRGFilename(rgid); } - // to periodically clean up freed memory so it can be used by other threads. fLRU->remove(rgid); return true; } @@ -1303,7 +1169,7 @@ class RowGroupStorage loadRG(rgid, fRGDatas[rgid]); } - void loadRG(uint64_t rgid, RGDataUnPtr& rgdata, bool unlinkDump = false) + void loadRG(uint64_t rgid, std::unique_ptr& rgdata, bool unlinkDump = false) { auto fname = makeRGFilename(rgid); @@ -1871,7 +1737,7 @@ void RowAggStorage::append(RowAggStorage& other) } } -RGDataUnPtr RowAggStorage::getNextRGData() +std::unique_ptr RowAggStorage::getNextRGData() { if (!fStorage) { @@ -1882,43 +1748,6 @@ RGDataUnPtr RowAggStorage::getNextRGData() return fStorage->getNextRGData(); } -bool RowAggStorage::getNextOutputRGData(RGDataUnPtr& rgdata) -{ - if (!fStorage) - { - return {}; - } - - cleanup(); - freeData(); - - // fGeneration is an unsigned int, we need a signed int for a comparison >= 0 - int32_t gen = fGeneration; - while (gen >= 0) - { - bool moreInGeneration = fStorage->getNextOutputRGData(rgdata); - - if (moreInGeneration) - { - fRowGroupOut->setData(rgdata.get()); - return true; - } - - // all generations have been emptied - if (fGeneration == 0) - { - break; - } - - // current generation has no more RGDatas to return - // load earlier generation and continue with returning its RGDatas - gen--; - fGeneration--; - fStorage.reset(fStorage->clone(fGeneration)); - } - return false; -} - void RowAggStorage::freeData() { for (auto& data : fGens) diff --git a/utils/rowgroup/rowstorage.h b/utils/rowgroup/rowstorage.h index 90a15b35c..50151dc61 100644 --- a/utils/rowgroup/rowstorage.h +++ b/utils/rowgroup/rowstorage.h @@ -20,7 +20,6 @@ #include "resourcemanager.h" #include "rowgroup.h" #include "idbcompress.h" -#include #include #include #include @@ -36,15 +35,10 @@ class RowPosHashStorage; using RowPosHashStoragePtr = std::unique_ptr; class RowGroupStorage; -using RGDataUnPtr = std::unique_ptr; -using PosOpos = std::pair; -using FgidTgid = std::pair; - uint64_t hashRow(const rowgroup::Row& r, std::size_t lastCol); constexpr const size_t MaxConstStrSize = 2048ULL; constexpr const size_t MaxConstStrBufSize = MaxConstStrSize << 1; -constexpr const uint64_t HashMaskElements = 64ULL; class RowAggStorage { @@ -103,12 +97,6 @@ class RowAggStorage */ std::unique_ptr getNextRGData(); - /** @brief Remove last RGData from in-memory storage or disk. - * Iterates over all generations on disk if available. - * @returns True if RGData is returned in parameter or false if no more RGDatas can be returned. - */ - bool getNextOutputRGData(std::unique_ptr& rgdata); - /** @brief TODO * * @param mergeFunc diff --git a/versioning/BRM/sessionmanagerserver.cpp b/versioning/BRM/sessionmanagerserver.cpp index bf0c7ccf9..427bafd69 100644 --- a/versioning/BRM/sessionmanagerserver.cpp +++ b/versioning/BRM/sessionmanagerserver.cpp @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/writeengine/bulk/we_columninfo.cpp b/writeengine/bulk/we_columninfo.cpp index 7d59d6da9..f2e4d0e7d 100644 --- a/writeengine/bulk/we_columninfo.cpp +++ b/writeengine/bulk/we_columninfo.cpp @@ -1672,7 +1672,7 @@ int ColumnInfo::updateDctnryStore(char* buf, ColPosPair** pos, const int totalRo // column. // This only applies to default text mode. This step is bypassed for // binary imports, because in that case, the data is already true binary. - if (((curCol.colType == WR_VARBINARY) || (curCol.colType == WR_BLOB)) && + if (((curCol.colType == WR_VARBINARY) || (curCol.colType == WR_BLOB && fpTableInfo->readFromSTDIN())) && (fpTableInfo->getImportDataMode() == IMPORT_DATA_TEXT)) { #ifdef PROFILE diff --git a/writeengine/bulk/we_tableinfo.cpp b/writeengine/bulk/we_tableinfo.cpp index 8876b64b7..011c70b5c 100644 --- a/writeengine/bulk/we_tableinfo.cpp +++ b/writeengine/bulk/we_tableinfo.cpp @@ -2381,5 +2381,10 @@ int TableInfo::allocateBRMColumnExtent(OID columnOID, uint16_t dbRoot, uint32_t& return rc; } +bool TableInfo::readFromSTDIN() +{ + return fReadFromStdin; +} + } // namespace WriteEngine // end of namespace diff --git a/writeengine/bulk/we_tableinfo.h b/writeengine/bulk/we_tableinfo.h index f5a09ec90..d7ba5ab56 100644 --- a/writeengine/bulk/we_tableinfo.h +++ b/writeengine/bulk/we_tableinfo.h @@ -77,7 +77,7 @@ class TableInfo : public WeUIDGID FILE* fHandle; // Handle to the input load file int fCurrentReadBuffer; // Id of current buffer being popu- // lated by the read thread - RID fTotalReadRows; // Total number of rows read + RID fTotalReadRows; // Total number of rows read unsigned fTotalErrRows; // Total error rows among all input // for this table. Is volatile to // insure parser & reader threads @@ -459,6 +459,8 @@ class TableInfo : public WeUIDGID void setJobUUID(const boost::uuids::uuid& jobUUID); + bool readFromSTDIN(); + public: friend class BulkLoad; friend class ColumnInfo; diff --git a/writeengine/server/we_dmlcommandproc.cpp b/writeengine/server/we_dmlcommandproc.cpp index 5260c66ce..79f62ad34 100644 --- a/writeengine/server/we_dmlcommandproc.cpp +++ b/writeengine/server/we_dmlcommandproc.cpp @@ -3696,16 +3696,6 @@ uint8_t WE_DMLCommandProc::processUpdate(messageqcpp::ByteStream& bs, std::strin inData = columnsUpdated[j]->get_DataVector()[0]; } - if (((colType.colDataType == execplan::CalpontSystemCatalog::DATE) && (inData.safeString("").compare("0000-00-00") == 0)) || - ((colType.colDataType == execplan::CalpontSystemCatalog::DATETIME) && - (inData.safeString("").compare("0000-00-00 00:00:00") == 0)) || - ((colType.colDataType == execplan::CalpontSystemCatalog::TIMESTAMP) && - (inData.safeString("").compare("0000-00-00 00:00:00") == 0))) - { - inData.dropString(); - isNull = true; - } - uint64_t nextVal = 0; if (colType.autoincrement) diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index a2d5f5388..b556f8100 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1099,8 +1099,8 @@ inline void allocateValArray(void*& valArray, ColTupleList::size_type totalRow, case WriteEngine::WR_VARBINARY: // treat same as char for now case WriteEngine::WR_CHAR: case WriteEngine::WR_BLOB: - case WriteEngine::WR_TEXT: valArray = calloc(sizeof(char), totalRow * MAX_COLUMN_BOUNDARY); break; - case WriteEngine::WR_TOKEN: valArray = calloc(sizeof(Token), totalRow); break; + case WriteEngine::WR_TEXT: valArray = calloc(totalRow * MAX_COLUMN_BOUNDARY, sizeof(char)); break; + case WriteEngine::WR_TOKEN: valArray = calloc(totalRow, sizeof(Token)); break; default: valArray = calloc(totalRow, colWidth); break; } }