1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-18 17:42:25 +03:00

Remove the recently added USE_SEGMENTED_FILES option, and indeed remove all

support for a nonsegmented mode from md.c.  Per recent discussions, there
doesn't seem to be much value in a "never segment" option as opposed to
segmenting with a suitably large segment size.  So instead provide a
configure-time switch to set the desired segment size in units of gigabytes.
While at it, expose a configure switch for BLCKSZ as well.

Zdenek Kotala
This commit is contained in:
Tom Lane
2008-05-02 01:08:27 +00:00
parent 94b0b545f8
commit 3c6248a828
8 changed files with 223 additions and 170 deletions

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.30 2008/03/10 20:06:27 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.31 2008/05/02 01:08:27 tgl Exp $
*
* NOTES:
*
@ -38,9 +38,9 @@
#include "storage/buffile.h"
/*
* We break BufFiles into gigabyte-sized segments, whether or not
* USE_SEGMENTED_FILES is defined. The reason is that we'd like large
* temporary BufFiles to be spread across multiple tablespaces when available.
* We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
* The reason is that we'd like large temporary BufFiles to be spread across
* multiple tablespaces when available.
*/
#define MAX_PHYSICAL_FILESIZE 0x40000000
#define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ)

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.137 2008/04/18 06:48:38 heikki Exp $
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.138 2008/05/02 01:08:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -56,7 +56,7 @@
* system's file size limit (often 2GBytes). In order to do that,
* we break relations up into "segment" files that are each shorter than
* the OS file size limit. The segment size is set by the RELSEG_SIZE
* configuration constant in pg_config_manual.h.
* configuration constant in pg_config.h.
*
* On disk, a relation must consist of consecutively numbered segment
* files in the pattern
@ -88,19 +88,13 @@
* segment, we assume that any subsequent segments are inactive.
*
* All MdfdVec objects are palloc'd in the MdCxt memory context.
*
* On platforms that support large files, USE_SEGMENTED_FILES can be
* #undef'd to disable the segmentation logic. In that case each
* relation is a single operating-system file.
*/
typedef struct _MdfdVec
{
File mdfd_vfd; /* fd number in fd.c's pool */
BlockNumber mdfd_segno; /* segment number, from 0 */
#ifdef USE_SEGMENTED_FILES
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
#endif
} MdfdVec;
static MemoryContext MdCxt; /* context for all md.c allocations */
@ -161,11 +155,8 @@ static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
static void register_unlink(RelFileNode rnode);
static MdfdVec *_fdvec_alloc(void);
#ifdef USE_SEGMENTED_FILES
static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
int oflags);
#endif
static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
bool isTemp, ExtensionBehavior behavior);
static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg);
@ -258,9 +249,7 @@ mdcreate(SMgrRelation reln, bool isRedo)
reln->md_fd->mdfd_vfd = fd;
reln->md_fd->mdfd_segno = 0;
#ifdef USE_SEGMENTED_FILES
reln->md_fd->mdfd_chain = NULL;
#endif
}
/*
@ -310,8 +299,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
path = relpath(rnode);
/*
* Delete or truncate the first segment, or only segment if not doing
* segmenting
* Delete or truncate the first segment.
*/
if (isRedo)
ret = unlink(path);
@ -344,8 +332,9 @@ mdunlink(RelFileNode rnode, bool isRedo)
rnode.relNode)));
}
#ifdef USE_SEGMENTED_FILES
/* Delete the additional segments, if any */
/*
* Delete any additional segments.
*/
else
{
char *segpath = (char *) palloc(strlen(path) + 12);
@ -374,7 +363,6 @@ mdunlink(RelFileNode rnode, bool isRedo)
}
pfree(segpath);
}
#endif
pfree(path);
@ -420,12 +408,8 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (off_t) BLCKSZ * blocknum;
#endif
/*
* Note: because caller usually obtained blocknum by calling mdnblocks,
@ -469,9 +453,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (!isTemp)
register_dirty_segment(reln, v);
#ifdef USE_SEGMENTED_FILES
Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
#endif
}
/*
@ -530,10 +512,8 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
mdfd->mdfd_vfd = fd;
mdfd->mdfd_segno = 0;
#ifdef USE_SEGMENTED_FILES
mdfd->mdfd_chain = NULL;
Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE));
#endif
return mdfd;
}
@ -552,7 +532,6 @@ mdclose(SMgrRelation reln)
reln->md_fd = NULL; /* prevent dangling pointer after error */
#ifdef USE_SEGMENTED_FILES
while (v != NULL)
{
MdfdVec *ov = v;
@ -564,11 +543,6 @@ mdclose(SMgrRelation reln)
v = v->mdfd_chain;
pfree(ov);
}
#else
if (v->mdfd_vfd >= 0)
FileClose(v->mdfd_vfd);
pfree(v);
#endif
}
/*
@ -583,12 +557,8 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (off_t) BLCKSZ * blocknum;
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR,
@ -653,12 +623,8 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (off_t) BLCKSZ * blocknum;
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR,
@ -707,8 +673,6 @@ BlockNumber
mdnblocks(SMgrRelation reln)
{
MdfdVec *v = mdopen(reln, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
BlockNumber nblocks;
BlockNumber segno = 0;
@ -764,9 +728,6 @@ mdnblocks(SMgrRelation reln)
v = v->mdfd_chain;
}
#else
return _mdnblocks(reln, v);
#endif
}
/*
@ -777,10 +738,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
{
MdfdVec *v;
BlockNumber curnblk;
#ifdef USE_SEGMENTED_FILES
BlockNumber priorblocks;
#endif
/*
* NOTE: mdnblocks makes sure we have opened all active segments, so that
@ -804,7 +762,6 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
v = mdopen(reln, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
priorblocks = 0;
while (v != NULL)
{
@ -866,19 +823,6 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
}
priorblocks += RELSEG_SIZE;
}
#else
/* For unsegmented files, it's a lot easier */
if (FileTruncate(v->mdfd_vfd, (off_t) nblocks * BLCKSZ) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
nblocks)));
if (!isTemp)
register_dirty_segment(reln, v);
#endif
}
/*
@ -901,7 +845,6 @@ mdimmedsync(SMgrRelation reln)
v = mdopen(reln, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
while (v != NULL)
{
if (FileSync(v->mdfd_vfd) < 0)
@ -914,15 +857,6 @@ mdimmedsync(SMgrRelation reln)
reln->smgr_rnode.relNode)));
v = v->mdfd_chain;
}
#else
if (FileSync(v->mdfd_vfd) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync relation %u/%u/%u: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode)));
#endif
}
/*
@ -1476,8 +1410,6 @@ _fdvec_alloc(void)
return (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
}
#ifdef USE_SEGMENTED_FILES
/*
* Open the specified segment of the relation,
* and make a MdfdVec object for it. Returns NULL on failure.
@ -1522,7 +1454,6 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
/* all done */
return v;
}
#endif /* USE_SEGMENTED_FILES */
/*
* _mdfd_getseg() -- Find the segment of the relation holding the
@ -1537,8 +1468,6 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
ExtensionBehavior behavior)
{
MdfdVec *v = mdopen(reln, behavior);
#ifdef USE_SEGMENTED_FILES
BlockNumber targetseg;
BlockNumber nextsegno;
@ -1600,8 +1529,6 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
}
v = v->mdfd_chain;
}
#endif
return v;
}

View File

@ -27,6 +27,15 @@
/* The normal alignment of `short', in bytes. */
#undef ALIGNOF_SHORT
/* Size of a disk block --- this also limits the size of a tuple. You can set
it bigger if you need bigger tuples (although TOAST should reduce the need
to have large tuples, since fields can be spread across multiple tuples).
BLCKSZ must be a power of 2. The maximum possible value of BLCKSZ is
currently 2^15 (32768). This is determined by the 15-bit widths of the
lp_off and lp_len fields in ItemIdData (see include/storage/itemid.h).
Changing BLCKSZ requires an initdb. */
#undef BLCKSZ
/* Define to the default TCP port number on which the server listens and to
which clients will try to connect. This can be overridden at run-time, but
it's convenient if your clients have the right default compiled in.
@ -644,6 +653,19 @@
your system. */
#undef PTHREAD_CREATE_JOINABLE
/* RELSEG_SIZE is the maximum number of blocks allowed in one disk file. Thus,
the maximum size of a single file is RELSEG_SIZE * BLCKSZ; relations bigger
than that are divided into multiple files. RELSEG_SIZE * BLCKSZ must be
less than your OS' limit on file size. This is often 2 GB or 4GB in a
32-bit operating system, unless you have large file support enabled. By
default, we make the limit 1 GB to avoid any possible integer-overflow
problems within the OS. A limit smaller than necessary only means we divide
a large relation into more chunks than necessary, so it seems best to err
in the direction of a small limit. A power-of-2 value is recommended to
save a few cycles in md.c, but is not absolutely required. Changing
RELSEG_SIZE requires an initdb. */
#undef RELSEG_SIZE
/* The size of `off_t', as computed by sizeof. */
#undef SIZEOF_OFF_T
@ -703,9 +725,6 @@
/* Use replacement snprintf() functions. */
#undef USE_REPL_SNPRINTF
/* Define to split data files into 1GB segments. */
#undef USE_SEGMENTED_FILES
/* Define to build with (Open)SSL support. (--with-openssl) */
#undef USE_SSL

View File

@ -6,51 +6,10 @@
* for developers. If you edit any of these, be sure to do a *full*
* rebuild (and an initdb if noted).
*
* $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.31 2008/04/11 22:54:23 tgl Exp $
* $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.32 2008/05/02 01:08:27 tgl Exp $
*------------------------------------------------------------------------
*/
/*
* Size of a disk block --- this also limits the size of a tuple. You
* can set it bigger if you need bigger tuples (although TOAST should
* reduce the need to have large tuples, since fields can be spread
* across multiple tuples).
*
* BLCKSZ must be a power of 2. The maximum possible value of BLCKSZ
* is currently 2^15 (32768). This is determined by the 15-bit widths
* of the lp_off and lp_len fields in ItemIdData (see
* include/storage/itemid.h).
*
* Changing BLCKSZ requires an initdb.
*/
#define BLCKSZ 8192
/*
* RELSEG_SIZE is the maximum number of blocks allowed in one disk
* file when USE_SEGMENTED_FILES is defined. Thus, the maximum size
* of a single file is RELSEG_SIZE * BLCKSZ; relations bigger than that
* are divided into multiple files.
*
* RELSEG_SIZE * BLCKSZ must be less than your OS' limit on file size.
* This is often 2 GB or 4GB in a 32-bit operating system, unless you
* have large file support enabled. By default, we make the limit 1
* GB to avoid any possible integer-overflow problems within the OS.
* A limit smaller than necessary only means we divide a large
* relation into more chunks than necessary, so it seems best to err
* in the direction of a small limit. (Besides, a power-of-2 value
* saves a few cycles in md.c.)
*
* When not using segmented files, RELSEG_SIZE is set to zero so that
* this behavior can be distinguished in pg_control.
*
* Changing RELSEG_SIZE requires an initdb.
*/
#ifdef USE_SEGMENTED_FILES
#define RELSEG_SIZE (0x40000000 / BLCKSZ)
#else
#define RELSEG_SIZE 0
#endif
/*
* Size of a WAL file block. This need have no particular relation to BLCKSZ.
* XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O,