You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-11-03 17:13:17 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			274 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			274 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
#include <unistd.h>
 | 
						|
#include <sys/types.h>
 | 
						|
#include <sys/stat.h>
 | 
						|
#include <fcntl.h>
 | 
						|
#include <cerrno>
 | 
						|
//#define NDEBUG
 | 
						|
#include <cassert>
 | 
						|
#include <cstring>
 | 
						|
 | 
						|
#include <lzo/lzoconf.h>
 | 
						|
#include <lzo/lzo1x.h>
 | 
						|
 | 
						|
#include <iostream>
 | 
						|
#include <iomanip>
 | 
						|
#include <sstream>
 | 
						|
#include <inttypes.h>
 | 
						|
#include <cstdlib>
 | 
						|
using namespace std;
 | 
						|
 | 
						|
#include <boost/scoped_array.hpp>
 | 
						|
 | 
						|
namespace
 | 
						|
{
 | 
						|
size_t exp_buf_size;
 | 
						|
unsigned vflg;
 | 
						|
 | 
						|
const string lzo_rctos(int r)
 | 
						|
{
 | 
						|
    switch (r)
 | 
						|
    {
 | 
						|
        case LZO_E_INPUT_NOT_CONSUMED:
 | 
						|
            return "LZO_E_INPUT_NOT_CONSUMED";
 | 
						|
 | 
						|
        default:
 | 
						|
            break;
 | 
						|
    }
 | 
						|
 | 
						|
    return "Unknown Error!";
 | 
						|
}
 | 
						|
 | 
						|
void usage()
 | 
						|
{
 | 
						|
    cout << "usage: idb_comp [-e size][-h] file(s)" << endl
 | 
						|
         << "\t-e size size (in KB) of expansion buffer" << endl
 | 
						|
         << "\t-h      display this help" << endl;
 | 
						|
}
 | 
						|
 | 
						|
int doit(const string& infile)
 | 
						|
{
 | 
						|
    int rc = 0;
 | 
						|
    int ifd = open(infile.c_str(), O_RDONLY | O_LARGEFILE | O_DIRECT);
 | 
						|
 | 
						|
    if (ifd < 0)
 | 
						|
    {
 | 
						|
        cerr << infile << ": open: " << strerror(errno) << endl;
 | 
						|
        return 1;
 | 
						|
    }
 | 
						|
 | 
						|
    struct stat istatbuf;
 | 
						|
 | 
						|
    fstat(ifd, &istatbuf);
 | 
						|
 | 
						|
    string outname(infile);
 | 
						|
 | 
						|
    string::size_type ptr;
 | 
						|
 | 
						|
    ptr = outname.find_last_of('.');
 | 
						|
 | 
						|
    if (ptr != string::npos)
 | 
						|
        outname.erase(ptr);
 | 
						|
 | 
						|
    ptr = outname.find_last_of('/');
 | 
						|
 | 
						|
    if (ptr != string::npos)
 | 
						|
        outname.erase(0, ptr + 1);
 | 
						|
 | 
						|
    outname = "./" + outname + ".cmp";
 | 
						|
 | 
						|
    int ofd = open(outname.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE | O_DIRECT, 0644);
 | 
						|
 | 
						|
    if (ofd < 0)
 | 
						|
    {
 | 
						|
        cerr << outname << ": open: " << strerror(errno) << endl;
 | 
						|
        close(ifd);
 | 
						|
        return 1;
 | 
						|
    }
 | 
						|
 | 
						|
    lzo_init();
 | 
						|
 | 
						|
    ssize_t nread = -1;
 | 
						|
    ssize_t nwritten = -1;
 | 
						|
    lzo_bytep ibuf;
 | 
						|
    lzo_bytep tibuf;
 | 
						|
    lzo_bytep cbuf;
 | 
						|
    lzo_bytep tcbuf;
 | 
						|
    lzo_bytep wkmem;
 | 
						|
    lzo_uint32 ibuf_len = 0;
 | 
						|
    lzo_uint cbuf_len = 0;
 | 
						|
 | 
						|
    ibuf_len = 512 * 1024 * 8;
 | 
						|
    tibuf = new lzo_byte[ibuf_len + 4095];
 | 
						|
 | 
						|
    if ((ptrdiff_t)tibuf & 0xfffULL)
 | 
						|
        ibuf = (lzo_bytep)((ptrdiff_t)tibuf & 0xfffffffffffff000ULL) + 4096;
 | 
						|
    else
 | 
						|
        ibuf = tibuf;
 | 
						|
 | 
						|
    cbuf_len = 512 * 1024 * 8;
 | 
						|
    tcbuf = new lzo_byte[cbuf_len + 4095 + exp_buf_size * 1024];
 | 
						|
 | 
						|
    if ((ptrdiff_t)tcbuf & 0xfff)
 | 
						|
        cbuf = (lzo_bytep)((ptrdiff_t)tcbuf & 0xfffffffffffff000ULL) + 4096;
 | 
						|
    else
 | 
						|
        cbuf = tcbuf;
 | 
						|
 | 
						|
    wkmem = new lzo_byte[LZO1X_1_15_MEM_COMPRESS];
 | 
						|
 | 
						|
    int r = LZO_E_OK;
 | 
						|
 | 
						|
    const int TOTAL_HDR_LEN = 4096 * 2;
 | 
						|
    char* hdrbuf = new char[TOTAL_HDR_LEN + 4095];
 | 
						|
    memset(hdrbuf, 0, TOTAL_HDR_LEN + 4095);
 | 
						|
    char* hdrbufp = 0;
 | 
						|
 | 
						|
    if ((ptrdiff_t)hdrbuf & 0xfff)
 | 
						|
        hdrbufp = (char*)((ptrdiff_t)hdrbuf & 0xfffffffffffff000ULL) + 4096;
 | 
						|
    else
 | 
						|
        hdrbufp = hdrbuf;
 | 
						|
 | 
						|
    struct compHdr
 | 
						|
    {
 | 
						|
        uint64_t ptrs[512];
 | 
						|
    };
 | 
						|
 | 
						|
    idbassert(sizeof(compHdr) <= 4096);
 | 
						|
 | 
						|
    compHdr* hdrptr1 = (compHdr*)hdrbufp;
 | 
						|
    compHdr* hdrptr  = hdrptr1 + 1; // advance to 2nd hdr to store compression ptrs
 | 
						|
    lseek(ofd, TOTAL_HDR_LEN, SEEK_SET);
 | 
						|
 | 
						|
    nread = read(ifd, ibuf, ibuf_len);
 | 
						|
 | 
						|
    int idx = 0;
 | 
						|
    off_t cmpoff = TOTAL_HDR_LEN;
 | 
						|
 | 
						|
    while (nread > 0)
 | 
						|
    {
 | 
						|
        cbuf_len = 512 * 1024 * 8;
 | 
						|
        memset(cbuf, 0, cbuf_len);
 | 
						|
        r = lzo1x_1_15_compress(ibuf, nread, cbuf, &cbuf_len, wkmem);
 | 
						|
 | 
						|
        if (r != LZO_E_OK)
 | 
						|
        {
 | 
						|
            cerr << "compression failed!: " << r << endl;
 | 
						|
            rc = 1;
 | 
						|
            goto out;
 | 
						|
        }
 | 
						|
 | 
						|
        if (cbuf_len > (unsigned)nread)
 | 
						|
        {
 | 
						|
            cerr << "WARNING: expansion detected! (output grew by " << (cbuf_len - nread) << " bytes)" << endl;
 | 
						|
            idbassert((cbuf_len - nread) <= exp_buf_size * 1024);
 | 
						|
        }
 | 
						|
 | 
						|
        if (cbuf_len & 0xfff)
 | 
						|
            cbuf_len = (cbuf_len & 0xfffffffffffff000ULL) + 4096;
 | 
						|
 | 
						|
        //cbuf_len = 512 * 1024 * 8;
 | 
						|
        nwritten = write(ofd, cbuf, cbuf_len);
 | 
						|
 | 
						|
        if (nwritten < 0 || (unsigned)nwritten != cbuf_len)
 | 
						|
        {
 | 
						|
            cerr << outname << ": write: " << strerror(errno) << " (" << nwritten << ')' << endl;
 | 
						|
            rc = 1;
 | 
						|
            goto out;
 | 
						|
        }
 | 
						|
 | 
						|
        if (vflg > 0)
 | 
						|
        {
 | 
						|
            lzo_bytep tbuf;
 | 
						|
            lzo_uint tbuflen = 4 * 1024 * 1024 + 4;
 | 
						|
            boost::scoped_array<lzo_byte> tbuf_sa(new lzo_byte[tbuflen]);
 | 
						|
            tbuf = tbuf_sa.get();
 | 
						|
            cout << "idx: " << idx << " off: " << cmpoff << " size: " << cbuf_len;
 | 
						|
            r = lzo1x_decompress(cbuf, cbuf_len, tbuf, &tbuflen, 0);
 | 
						|
            cout << " r: " << lzo_rctos(r) << " size: " << tbuflen << endl;
 | 
						|
        }
 | 
						|
 | 
						|
        hdrptr->ptrs[idx] = cmpoff;
 | 
						|
        idx++;
 | 
						|
        cmpoff += cbuf_len;
 | 
						|
 | 
						|
        nread = read(ifd, ibuf, ibuf_len);
 | 
						|
    }
 | 
						|
 | 
						|
    if (nread < 0)
 | 
						|
    {
 | 
						|
        cerr << infile << ": read: " << strerror(errno) << endl;
 | 
						|
        rc = 1;
 | 
						|
        goto out;
 | 
						|
    }
 | 
						|
 | 
						|
    hdrptr->ptrs[idx] = cmpoff;
 | 
						|
    idbassert(idx <= 64);
 | 
						|
 | 
						|
    // Fill in meta-data information in first header
 | 
						|
    hdrptr1->ptrs[0] = 0xfdc119a384d0778eULL;
 | 
						|
    hdrptr1->ptrs[1] = 1;
 | 
						|
    hdrptr1->ptrs[2] = 1;
 | 
						|
 | 
						|
    nwritten = pwrite(ofd, hdrbufp, TOTAL_HDR_LEN, 0);
 | 
						|
    idbassert(nwritten == TOTAL_HDR_LEN);
 | 
						|
 | 
						|
out:
 | 
						|
    delete [] wkmem;
 | 
						|
    delete [] tcbuf;
 | 
						|
    delete [] tibuf;
 | 
						|
    fsync(ofd);
 | 
						|
    struct stat ostatbuf;
 | 
						|
    fstat(ofd, &ostatbuf);
 | 
						|
    idbassert(ostatbuf.st_size == (signed)hdrptr->ptrs[idx]);
 | 
						|
    delete [] hdrbuf;
 | 
						|
    cout << infile << ": Input Size: " << istatbuf.st_size
 | 
						|
         << " Output size: " << ostatbuf.st_size
 | 
						|
         << " Compression: " << (100LL - (ostatbuf.st_size * 100LL / istatbuf.st_size)) << '%' << endl;
 | 
						|
    close(ofd);
 | 
						|
    close(ifd);
 | 
						|
 | 
						|
    return rc;
 | 
						|
}
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
int main(int argc, char** argv)
 | 
						|
{
 | 
						|
    opterr = 0;
 | 
						|
    int c;
 | 
						|
    exp_buf_size = 128;
 | 
						|
    vflg = 0;
 | 
						|
 | 
						|
    while ((c = getopt(argc, argv, "e:vh")) != -1)
 | 
						|
        switch (c)
 | 
						|
        {
 | 
						|
            case 'e':
 | 
						|
                exp_buf_size = atoi(optarg);
 | 
						|
                break;
 | 
						|
 | 
						|
            case 'v':
 | 
						|
                vflg++;
 | 
						|
                break;
 | 
						|
 | 
						|
            case 'h':
 | 
						|
            default:
 | 
						|
                usage();
 | 
						|
                return (c == 'h' ? 0 : 1);
 | 
						|
                break;
 | 
						|
        }
 | 
						|
 | 
						|
    if ((argc - optind) < 1)
 | 
						|
    {
 | 
						|
        usage();
 | 
						|
        return 1;
 | 
						|
    }
 | 
						|
 | 
						|
    int rc = 0;
 | 
						|
 | 
						|
    for (int i = optind; i < argc; i++)
 | 
						|
        if (doit(argv[i]))
 | 
						|
            rc = 1;
 | 
						|
 | 
						|
    return rc;
 | 
						|
}
 | 
						|
 |