1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00
2025-02-21 20:02:38 +04:00

237 lines
7.8 KiB
C++

/* Copyright (C) 2014 InfiniDB, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#pragma once
#define _FILE_OFFSET_BITS 64
#include <stdlib.h>
#include <string>
#include "largefile64.h"
#include <sys/types.h>
#include "stdint.h"
#include <unistd.h>
#define INVALID_HANDLE_VALUE 0 // Defined already in MSC
namespace idbdatafile
{
/**
* IDBDataFile is a combination abstract base class and factory. The purpose
* is to encapsulate different underlying storage types from the InfiniDB
* engine. The interface is designed so that behavior is consistent across
* storage types, knowing that options like HDFS are somewhat limited as
* compared to normal POSIX file systems.
*
* to-do think about const-correctness
*/
class IDBDataFile
{
public:
/**
* The TYPE enum defines the supported underlying storage types
*/
enum Types
{
UNKNOWN = 0x0000,
BUFFERED = 0x0001,
UNBUFFERED = 0x0002,
HDFS = 0x0003,
CLOUD = 0x0004,
};
/**
* The Options enum defines elements of a bitmask that can be passed into
* the open method for controlling specific options relating to the
* particular file type in question. If an option is unsupported by the
* particular type the open() method will fail.
* USE_ODIRECT - requests the O_DIRECT flag to be set, only for
* unbuffered files
* USE_VBUF - requests that a non-default buffer is use for a
* buffered file. For now, the size of the buffer is
* hard-coded to match DEFAULT_BUFSIZ in writeengine
* (1*1024*1024)
* USE_NOVBUF - requests that buffering is disabled when using a
* buffered type file
* USE_TMPFILE - requests to output the membuffer to a temp file i/o
* overwrite the original file
*/
enum Options
{
USE_ODIRECT = 0x0001,
USE_VBUF = 0x0002,
USE_NOVBUF = 0x0004,
USE_TMPFILE = 0x0008
};
/**
* DEFSIZE is used as the starting size for the memory buffer of read/write
* HDFS files. Thes files are entirely buffered in memory and modified
* there since HDFS does not support any file updates. This value was
* engineered to accomodate the standard default extent for a 4-byte column
* <Deprecated>
*/
static const int DEFSIZE = 33562624;
/**
* EXTENTSIZE is used with the passed in col width the starting size for the
* memory buffer of read/write HDFS files. Thes files are entirely buffered
* in memory and modified there since HDFS does not support any file updates.
*/
static const int EXTENTSIZE = 8390656;
/**
* This is an alternate factory method that accepts a typical mode
* string (ala fopen). Note that in general IDBDataFile only recognizes
* a subset of the possible mode string that fopen supports however
* at present time, the mode string will be passed through directly to
* the BufferedFile constructor and through to fopen. The eventual goal
* is not complete support but rather only that necessary for Infinidb so
* this will be reconsidered.
*/
static IDBDataFile* open(Types type, const char* fname, const char* mode, unsigned opts,
unsigned colWidth = 4);
/**
* Library users should assume that the destructor closes the file.
* This actually occurs in the derived clasess - hence the protected
* member close() here
*/
virtual ~IDBDataFile();
/**
* This returns the name of the file.
*/
const std::string& name() const;
/**
* This is a positional read method similar to kernel style pread
* or fseek followed by read for C-library FILE*. Return value
* is the number of bytes read.
*/
virtual ssize_t pread(void* ptr, off64_t offset, size_t count) = 0;
/**
* This is a read method similar to kernel style read or C library
* fread(). Return value is the number of bytes read.
*/
virtual ssize_t read(void* ptr, size_t count) = 0;
/**
* The write() call semantics match the standard library. There is
* no positional write and further there is no lseek or equivalent so
* all writing must therefore occur sequentially - either from the
* beginning of a file (open via WRITE) or the end of a file (open via
* APPEND). Return value is the number of bytes written.
*/
virtual ssize_t write(const void* ptr, size_t count) = 0;
/**
* The seek() method is equivalent to the lseek() and fseek() functions.
* The whence parameter accepts SEEK_SET, SEEK_CUR, SEEK_END just as
* those functions do. Note that not all file systems support this
* operation - ex. HDFS will not support it for files opened for writing
* Returns 0 on success, -1 on error
*/
virtual int seek(off64_t offset, int whence) = 0;
/**
* The truncate() method is equivalent to the ftruncate method. Note
* that not all file types support this operation - ex. HDFS files opened
* or write or append do not, but HDFS files opened for modification do.
* Returns 0 on success, -1 on error.
*/
virtual int truncate(off64_t length) = 0;
/**
* The size() method returns the size of the file in a manner consistent
* with the underlying filesystem. Note that this method will always
* return the correct size from the perspective of the open file handle,
* thus depending on the semantics of the underlying file system an
* external view of size may differ (ex. if writing buffered i/o before
* a flush/sync or if writing an open HDFS file). Returns -1 on error.
*/
virtual off64_t size() = 0;
/**
* The tell() call returns the current offset in the file. This is
* similar to lseek with 0 offset in the standard library and ftell
* for buffered FILE *s.
*/
virtual off64_t tell() = 0;
/**
* The flush() method instructs the file to write any buffered contents
* to disk. Where relevant this method will include a call to fsync
*/
virtual int flush() = 0;
/**
* The mtime() method returns the modification time of the file in
* seconds. Returns -1 on error.
*/
virtual time_t mtime() = 0;
/**
* The fallocate() method preallocates disk space cheaper then
* sequential write. fallocate() is supported by a limited number
* of FSes.This method is implemented for Un-/BufferedFile classes
* only.
* Returns -1 on error.
*/
virtual int fallocate(int mode, off64_t offset, off64_t length) = 0;
int colWidth()
{
return m_fColWidth;
}
protected:
/**
* Constructor - takes the filename to be stored in a member variable
* for logging purposes
*/
explicit IDBDataFile(const char* fname);
/**
* The close() method closes the file. It is defined as protected
* because the preference is for close() to happen automatically during
* deletion of the object
*/
virtual int close() = 0;
/**
* file name
*/
std::string m_fname;
/**
* Column width. If not applicable, defaults to 4.
* We use this to determine the initial size of hdfs ram buffers.
*/
int m_fColWidth;
};
inline IDBDataFile::IDBDataFile(const char* fname) : m_fname(fname), m_fColWidth(4)
{
}
inline IDBDataFile::~IDBDataFile() = default;
inline const std::string& IDBDataFile::name() const
{
return m_fname;
}
} // namespace idbdatafile