mirror of
https://github.com/postgres/postgres.git
synced 2025-04-20 00:42:27 +03:00
793 lines
22 KiB
C
793 lines
22 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* nodeHashjoin.c--
|
|
* Routines to handle hash join nodes
|
|
*
|
|
* Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.1.1.1 1996/07/09 06:21:26 scrappy Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include <sys/file.h>
|
|
|
|
#include "storage/bufmgr.h" /* for BLCKSZ */
|
|
#include "storage/fd.h" /* for SEEK_ */
|
|
#include "executor/executor.h"
|
|
#include "executor/nodeHash.h"
|
|
#include "executor/nodeHashjoin.h"
|
|
|
|
#include "optimizer/clauses.h" /* for get_leftop */
|
|
|
|
|
|
#include "utils/palloc.h"
|
|
|
|
static TupleTableSlot *
|
|
ExecHashJoinOuterGetTuple(Plan *node, Plan* parent, HashJoinState *hjstate);
|
|
|
|
static TupleTableSlot *
|
|
ExecHashJoinGetSavedTuple(HashJoinState *hjstate, char *buffer,
|
|
File file, TupleTableSlot *tupleSlot, int *block, char **position);
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecHashJoin
|
|
*
|
|
* This function implements the Hybrid Hashjoin algorithm.
|
|
* recursive partitioning remains to be added.
|
|
* Note: the relation we build hash table on is the inner
|
|
* the other one is outer.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
TupleTableSlot * /* return: a tuple or NULL */
|
|
ExecHashJoin(HashJoin *node)
|
|
{
|
|
HashJoinState *hjstate;
|
|
EState *estate;
|
|
Plan *outerNode;
|
|
Hash *hashNode;
|
|
List *hjclauses;
|
|
Expr *clause;
|
|
List *qual;
|
|
ScanDirection dir;
|
|
TupleTableSlot *inntuple;
|
|
Var *outerVar;
|
|
ExprContext *econtext;
|
|
|
|
HashJoinTable hashtable;
|
|
int bucketno;
|
|
HashBucket bucket;
|
|
HeapTuple curtuple;
|
|
|
|
bool qualResult;
|
|
|
|
TupleTableSlot *outerTupleSlot;
|
|
TupleTableSlot *innerTupleSlot;
|
|
int nbatch;
|
|
int curbatch;
|
|
File *outerbatches;
|
|
RelativeAddr *outerbatchNames;
|
|
RelativeAddr *outerbatchPos;
|
|
Var *innerhashkey;
|
|
int batch;
|
|
int batchno;
|
|
char *buffer;
|
|
int i;
|
|
bool hashPhaseDone;
|
|
char *pos;
|
|
|
|
/* ----------------
|
|
* get information from HashJoin node
|
|
* ----------------
|
|
*/
|
|
hjstate = node->hashjoinstate;
|
|
hjclauses = node->hashclauses;
|
|
clause = lfirst(hjclauses);
|
|
estate = node->join.state;
|
|
qual = node->join.qual;
|
|
hashNode = (Hash *)innerPlan(node);
|
|
outerNode = outerPlan(node);
|
|
hashPhaseDone = node->hashdone;
|
|
|
|
dir = estate->es_direction;
|
|
|
|
/* -----------------
|
|
* get information from HashJoin state
|
|
* -----------------
|
|
*/
|
|
hashtable = hjstate->hj_HashTable;
|
|
bucket = hjstate->hj_CurBucket;
|
|
curtuple = hjstate->hj_CurTuple;
|
|
|
|
/* --------------------
|
|
* initialize expression context
|
|
* --------------------
|
|
*/
|
|
econtext = hjstate->jstate.cs_ExprContext;
|
|
|
|
if (hjstate->jstate.cs_TupFromTlist) {
|
|
TupleTableSlot *result;
|
|
bool isDone;
|
|
|
|
result = ExecProject(hjstate->jstate.cs_ProjInfo, &isDone);
|
|
if (!isDone)
|
|
return result;
|
|
}
|
|
/* ----------------
|
|
* if this is the first call, build the hash table for inner relation
|
|
* ----------------
|
|
*/
|
|
if (!hashPhaseDone) { /* if the hash phase not completed */
|
|
hashtable = node->hashjointable;
|
|
if (hashtable == NULL) { /* if the hash table has not been created */
|
|
/* ----------------
|
|
* create the hash table
|
|
* ----------------
|
|
*/
|
|
hashtable = ExecHashTableCreate(hashNode);
|
|
hjstate->hj_HashTable = hashtable;
|
|
innerhashkey = hashNode->hashkey;
|
|
hjstate->hj_InnerHashKey = innerhashkey;
|
|
|
|
/* ----------------
|
|
* execute the Hash node, to build the hash table
|
|
* ----------------
|
|
*/
|
|
hashNode->hashtable = hashtable;
|
|
innerTupleSlot = ExecProcNode((Plan *)hashNode, (Plan*) node);
|
|
}
|
|
bucket = NULL;
|
|
curtuple = NULL;
|
|
curbatch = 0;
|
|
node->hashdone = true;
|
|
}
|
|
nbatch = hashtable->nbatch;
|
|
outerbatches = hjstate->hj_OuterBatches;
|
|
if (nbatch > 0 && outerbatches == NULL) { /* if needs hash partition */
|
|
/* -----------------
|
|
* allocate space for file descriptors of outer batch files
|
|
* then open the batch files in the current process
|
|
* -----------------
|
|
*/
|
|
innerhashkey = hashNode->hashkey;
|
|
hjstate->hj_InnerHashKey = innerhashkey;
|
|
outerbatchNames = (RelativeAddr*)
|
|
ABSADDR(hashtable->outerbatchNames);
|
|
outerbatches = (File*)
|
|
palloc(nbatch * sizeof(File));
|
|
for (i=0; i<nbatch; i++) {
|
|
outerbatches[i] = FileNameOpenFile(
|
|
ABSADDR(outerbatchNames[i]),
|
|
O_CREAT | O_RDWR, 0600);
|
|
}
|
|
hjstate->hj_OuterBatches = outerbatches;
|
|
|
|
/* ------------------
|
|
* get the inner batch file descriptors from the
|
|
* hash node
|
|
* ------------------
|
|
*/
|
|
hjstate->hj_InnerBatches =
|
|
hashNode->hashstate->hashBatches;
|
|
}
|
|
outerbatchPos = (RelativeAddr*)ABSADDR(hashtable->outerbatchPos);
|
|
curbatch = hashtable->curbatch;
|
|
outerbatchNames = (RelativeAddr*)ABSADDR(hashtable->outerbatchNames);
|
|
|
|
/* ----------------
|
|
* Now get an outer tuple and probe into the hash table for matches
|
|
* ----------------
|
|
*/
|
|
outerTupleSlot = hjstate->jstate.cs_OuterTupleSlot;
|
|
outerVar = get_leftop(clause);
|
|
|
|
bucketno = -1; /* if bucketno remains -1, means use old outer tuple */
|
|
if (TupIsNull(outerTupleSlot)) {
|
|
/*
|
|
* if the current outer tuple is nil, get a new one
|
|
*/
|
|
outerTupleSlot = (TupleTableSlot*)
|
|
ExecHashJoinOuterGetTuple(outerNode, (Plan*)node, hjstate);
|
|
|
|
while (curbatch <= nbatch && TupIsNull(outerTupleSlot)) {
|
|
/*
|
|
* if the current batch runs out, switch to new batch
|
|
*/
|
|
curbatch = ExecHashJoinNewBatch(hjstate);
|
|
if (curbatch > nbatch) {
|
|
/*
|
|
* when the last batch runs out, clean up
|
|
*/
|
|
ExecHashTableDestroy(hashtable);
|
|
hjstate->hj_HashTable = NULL;
|
|
return NULL;
|
|
}
|
|
else
|
|
outerTupleSlot = (TupleTableSlot*)
|
|
ExecHashJoinOuterGetTuple(outerNode, (Plan*)node, hjstate);
|
|
}
|
|
/*
|
|
* now we get an outer tuple, find the corresponding bucket for
|
|
* this tuple from the hash table
|
|
*/
|
|
econtext->ecxt_outertuple = outerTupleSlot;
|
|
|
|
#ifdef HJDEBUG
|
|
printf("Probing ");
|
|
#endif
|
|
bucketno = ExecHashGetBucket(hashtable, econtext, outerVar);
|
|
bucket=(HashBucket)(ABSADDR(hashtable->top)
|
|
+ bucketno * hashtable->bucketsize);
|
|
}
|
|
|
|
for (;;) {
|
|
/* ----------------
|
|
* Now we've got an outer tuple and the corresponding hash bucket,
|
|
* but this tuple may not belong to the current batch.
|
|
* ----------------
|
|
*/
|
|
if (curbatch == 0 && bucketno != -1) /* if this is the first pass */
|
|
batch = ExecHashJoinGetBatch(bucketno, hashtable, nbatch);
|
|
else
|
|
batch = 0;
|
|
if (batch > 0) {
|
|
/*
|
|
* if the current outer tuple does not belong to
|
|
* the current batch, save to the tmp file for
|
|
* the corresponding batch.
|
|
*/
|
|
buffer = ABSADDR(hashtable->batch) + (batch - 1) * BLCKSZ;
|
|
batchno = batch - 1;
|
|
pos = ExecHashJoinSaveTuple(outerTupleSlot->val,
|
|
buffer,
|
|
outerbatches[batchno],
|
|
ABSADDR(outerbatchPos[batchno]));
|
|
|
|
outerbatchPos[batchno] = RELADDR(pos);
|
|
}
|
|
else if (bucket != NULL) {
|
|
do {
|
|
/*
|
|
* scan the hash bucket for matches
|
|
*/
|
|
curtuple = ExecScanHashBucket(hjstate,
|
|
bucket,
|
|
curtuple,
|
|
hjclauses,
|
|
econtext);
|
|
|
|
if (curtuple != NULL) {
|
|
/*
|
|
* we've got a match, but still need to test qpqual
|
|
*/
|
|
inntuple = ExecStoreTuple(curtuple,
|
|
hjstate->hj_HashTupleSlot,
|
|
InvalidBuffer,
|
|
false); /* don't pfree this tuple */
|
|
|
|
econtext->ecxt_innertuple = inntuple;
|
|
|
|
/* ----------------
|
|
* test to see if we pass the qualification
|
|
* ----------------
|
|
*/
|
|
qualResult = ExecQual((List*)qual, econtext);
|
|
|
|
/* ----------------
|
|
* if we pass the qual, then save state for next call and
|
|
* have ExecProject form the projection, store it
|
|
* in the tuple table, and return the slot.
|
|
* ----------------
|
|
*/
|
|
if (qualResult) {
|
|
ProjectionInfo *projInfo;
|
|
TupleTableSlot *result;
|
|
bool isDone;
|
|
|
|
hjstate->hj_CurBucket = bucket;
|
|
hjstate->hj_CurTuple = curtuple;
|
|
hashtable->curbatch = curbatch;
|
|
hjstate->jstate.cs_OuterTupleSlot = outerTupleSlot;
|
|
|
|
projInfo = hjstate->jstate.cs_ProjInfo;
|
|
result = ExecProject(projInfo, &isDone);
|
|
hjstate->jstate.cs_TupFromTlist = !isDone;
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
while (curtuple != NULL);
|
|
}
|
|
|
|
/* ----------------
|
|
* Now the current outer tuple has run out of matches,
|
|
* so we free it and get a new outer tuple.
|
|
* ----------------
|
|
*/
|
|
outerTupleSlot = (TupleTableSlot*)
|
|
ExecHashJoinOuterGetTuple(outerNode, (Plan*) node, hjstate);
|
|
|
|
while (curbatch <= nbatch && TupIsNull(outerTupleSlot)) {
|
|
/*
|
|
* if the current batch runs out, switch to new batch
|
|
*/
|
|
curbatch = ExecHashJoinNewBatch(hjstate);
|
|
if (curbatch > nbatch) {
|
|
/*
|
|
* when the last batch runs out, clean up
|
|
*/
|
|
ExecHashTableDestroy(hashtable);
|
|
hjstate->hj_HashTable = NULL;
|
|
return NULL;
|
|
}
|
|
else
|
|
outerTupleSlot = (TupleTableSlot*)
|
|
ExecHashJoinOuterGetTuple(outerNode, (Plan*)node, hjstate);
|
|
}
|
|
|
|
/* ----------------
|
|
* Now get the corresponding hash bucket for the new
|
|
* outer tuple.
|
|
* ----------------
|
|
*/
|
|
econtext->ecxt_outertuple = outerTupleSlot;
|
|
#ifdef HJDEBUG
|
|
printf("Probing ");
|
|
#endif
|
|
bucketno = ExecHashGetBucket(hashtable, econtext, outerVar);
|
|
bucket=(HashBucket)(ABSADDR(hashtable->top)
|
|
+ bucketno * hashtable->bucketsize);
|
|
curtuple = NULL;
|
|
}
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecInitHashJoin
|
|
*
|
|
* Init routine for HashJoin node.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
bool /* return: initialization status */
|
|
ExecInitHashJoin(HashJoin *node, EState *estate, Plan *parent)
|
|
{
|
|
HashJoinState *hjstate;
|
|
Plan *outerNode;
|
|
Hash *hashNode;
|
|
|
|
/* ----------------
|
|
* assign the node's execution state
|
|
* ----------------
|
|
*/
|
|
node->join.state = estate;
|
|
|
|
/* ----------------
|
|
* create state structure
|
|
* ----------------
|
|
*/
|
|
hjstate = makeNode(HashJoinState);
|
|
|
|
node->hashjoinstate = hjstate;
|
|
|
|
/* ----------------
|
|
* Miscellanious initialization
|
|
*
|
|
* + assign node's base_id
|
|
* + assign debugging hooks and
|
|
* + create expression context for node
|
|
* ----------------
|
|
*/
|
|
ExecAssignNodeBaseInfo(estate, &hjstate->jstate, parent);
|
|
ExecAssignExprContext(estate, &hjstate->jstate);
|
|
|
|
#define HASHJOIN_NSLOTS 2
|
|
/* ----------------
|
|
* tuple table initialization
|
|
* ----------------
|
|
*/
|
|
ExecInitResultTupleSlot(estate, &hjstate->jstate);
|
|
ExecInitOuterTupleSlot(estate, hjstate);
|
|
|
|
/* ----------------
|
|
* initializes child nodes
|
|
* ----------------
|
|
*/
|
|
outerNode = outerPlan((Plan *)node);
|
|
hashNode = (Hash*)innerPlan((Plan *)node);
|
|
|
|
ExecInitNode(outerNode, estate, (Plan *) node);
|
|
ExecInitNode((Plan*)hashNode, estate, (Plan *) node);
|
|
|
|
/* ----------------
|
|
* now for some voodoo. our temporary tuple slot
|
|
* is actually the result tuple slot of the Hash node
|
|
* (which is our inner plan). we do this because Hash
|
|
* nodes don't return tuples via ExecProcNode() -- instead
|
|
* the hash join node uses ExecScanHashBucket() to get
|
|
* at the contents of the hash table. -cim 6/9/91
|
|
* ----------------
|
|
*/
|
|
{
|
|
HashState *hashstate = hashNode->hashstate;
|
|
TupleTableSlot *slot =
|
|
hashstate->cstate.cs_ResultTupleSlot;
|
|
hjstate->hj_HashTupleSlot = slot;
|
|
}
|
|
hjstate->hj_OuterTupleSlot->ttc_tupleDescriptor =
|
|
ExecGetTupType(outerNode);
|
|
|
|
/*
|
|
hjstate->hj_OuterTupleSlot->ttc_execTupDescriptor =
|
|
ExecGetExecTupDesc(outerNode);
|
|
*/
|
|
|
|
/* ----------------
|
|
* initialize tuple type and projection info
|
|
* ----------------
|
|
*/
|
|
ExecAssignResultTypeFromTL((Plan*) node, &hjstate->jstate);
|
|
ExecAssignProjectionInfo((Plan*) node, &hjstate->jstate);
|
|
|
|
/* ----------------
|
|
* XXX comment me
|
|
* ----------------
|
|
*/
|
|
|
|
node->hashdone = false;
|
|
|
|
hjstate->hj_HashTable = (HashJoinTable)NULL;
|
|
hjstate->hj_HashTableShmId = (IpcMemoryId)0;
|
|
hjstate->hj_CurBucket = (HashBucket )NULL;
|
|
hjstate->hj_CurTuple = (HeapTuple )NULL;
|
|
hjstate->hj_CurOTuple = (OverflowTuple )NULL;
|
|
hjstate->hj_InnerHashKey = (Var*)NULL;
|
|
hjstate->hj_OuterBatches = (File*)NULL;
|
|
hjstate->hj_InnerBatches = (File*)NULL;
|
|
hjstate->hj_OuterReadPos = (char*)NULL;
|
|
hjstate->hj_OuterReadBlk = (int)0;
|
|
|
|
hjstate->jstate.cs_OuterTupleSlot = (TupleTableSlot*) NULL;
|
|
hjstate->jstate.cs_TupFromTlist = (bool) false;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
int
|
|
ExecCountSlotsHashJoin(HashJoin *node)
|
|
{
|
|
return ExecCountSlotsNode(outerPlan(node)) +
|
|
ExecCountSlotsNode(innerPlan(node)) +
|
|
HASHJOIN_NSLOTS;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecEndHashJoin
|
|
*
|
|
* clean up routine for HashJoin node
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
void
|
|
ExecEndHashJoin(HashJoin *node)
|
|
{
|
|
HashJoinState *hjstate;
|
|
|
|
/* ----------------
|
|
* get info from the HashJoin state
|
|
* ----------------
|
|
*/
|
|
hjstate = node->hashjoinstate;
|
|
|
|
/* ----------------
|
|
* free hash table in case we end plan before all tuples are retrieved
|
|
* ---------------
|
|
*/
|
|
if (hjstate->hj_HashTable) {
|
|
ExecHashTableDestroy(hjstate->hj_HashTable);
|
|
hjstate->hj_HashTable = NULL;
|
|
}
|
|
|
|
/* ----------------
|
|
* Free the projection info and the scan attribute info
|
|
*
|
|
* Note: we don't ExecFreeResultType(hjstate)
|
|
* because the rule manager depends on the tupType
|
|
* returned by ExecMain(). So for now, this
|
|
* is freed at end-transaction time. -cim 6/2/91
|
|
* ----------------
|
|
*/
|
|
ExecFreeProjectionInfo(&hjstate->jstate);
|
|
|
|
/* ----------------
|
|
* clean up subtrees
|
|
* ----------------
|
|
*/
|
|
ExecEndNode(outerPlan((Plan *) node), (Plan*)node);
|
|
ExecEndNode(innerPlan((Plan *) node), (Plan*)node);
|
|
|
|
/* ----------------
|
|
* clean out the tuple table
|
|
* ----------------
|
|
*/
|
|
ExecClearTuple(hjstate->jstate.cs_ResultTupleSlot);
|
|
ExecClearTuple(hjstate->hj_OuterTupleSlot);
|
|
ExecClearTuple(hjstate->hj_HashTupleSlot);
|
|
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecHashJoinOuterGetTuple
|
|
*
|
|
* get the next outer tuple for hashjoin: either by
|
|
* executing a plan node as in the first pass, or from
|
|
* the tmp files for the hashjoin batches.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
|
|
static TupleTableSlot *
|
|
ExecHashJoinOuterGetTuple(Plan *node, Plan* parent, HashJoinState *hjstate)
|
|
{
|
|
TupleTableSlot *slot;
|
|
HashJoinTable hashtable;
|
|
int curbatch;
|
|
File *outerbatches;
|
|
char *outerreadPos;
|
|
int batchno;
|
|
char *outerreadBuf;
|
|
int outerreadBlk;
|
|
|
|
hashtable = hjstate->hj_HashTable;
|
|
curbatch = hashtable->curbatch;
|
|
|
|
if (curbatch == 0) { /* if it is the first pass */
|
|
slot = ExecProcNode(node, parent);
|
|
return slot;
|
|
}
|
|
|
|
/*
|
|
* otherwise, read from the tmp files
|
|
*/
|
|
outerbatches = hjstate->hj_OuterBatches;
|
|
outerreadPos = hjstate->hj_OuterReadPos;
|
|
outerreadBlk = hjstate->hj_OuterReadBlk;
|
|
outerreadBuf = ABSADDR(hashtable->readbuf);
|
|
batchno = curbatch - 1;
|
|
|
|
slot = ExecHashJoinGetSavedTuple(hjstate,
|
|
outerreadBuf,
|
|
outerbatches[batchno],
|
|
hjstate->hj_OuterTupleSlot,
|
|
&outerreadBlk,
|
|
&outerreadPos);
|
|
|
|
hjstate->hj_OuterReadPos = outerreadPos;
|
|
hjstate->hj_OuterReadBlk = outerreadBlk;
|
|
|
|
return slot;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecHashJoinGetSavedTuple
|
|
*
|
|
* read the next tuple from a tmp file using a certain buffer
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
|
|
static TupleTableSlot *
|
|
ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
|
|
char *buffer,
|
|
File file,
|
|
TupleTableSlot *tupleSlot,
|
|
int *block, /* return parameter */
|
|
char **position) /* return parameter */
|
|
{
|
|
char *bufstart;
|
|
char *bufend;
|
|
int cc;
|
|
HeapTuple heapTuple;
|
|
HashJoinTable hashtable;
|
|
|
|
hashtable = hjstate->hj_HashTable;
|
|
bufend = buffer + *(long*)buffer;
|
|
bufstart = (char*)(buffer + sizeof(long));
|
|
if ((*position == NULL) || (*position >= bufend)) {
|
|
if (*position == NULL)
|
|
(*block) = 0;
|
|
else
|
|
(*block)++;
|
|
FileSeek(file, *block * BLCKSZ, SEEK_SET);
|
|
cc = FileRead(file, buffer, BLCKSZ);
|
|
NDirectFileRead++;
|
|
if (cc < 0)
|
|
perror("FileRead");
|
|
if (cc == 0) /* end of file */
|
|
return NULL;
|
|
else
|
|
(*position) = bufstart;
|
|
}
|
|
heapTuple = (HeapTuple) (*position);
|
|
(*position) = (char*)LONGALIGN(*position + heapTuple->t_len);
|
|
|
|
return ExecStoreTuple(heapTuple,tupleSlot,InvalidBuffer,false);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecHashJoinNewBatch
|
|
*
|
|
* switch to a new hashjoin batch
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
int
|
|
ExecHashJoinNewBatch(HashJoinState *hjstate)
|
|
{
|
|
File *innerBatches;
|
|
File *outerBatches;
|
|
int *innerBatchSizes;
|
|
Var *innerhashkey;
|
|
HashJoinTable hashtable;
|
|
int nbatch;
|
|
char *readPos;
|
|
int readBlk;
|
|
char *readBuf;
|
|
TupleTableSlot *slot;
|
|
ExprContext *econtext;
|
|
int i;
|
|
int cc;
|
|
int newbatch;
|
|
|
|
hashtable = hjstate->hj_HashTable;
|
|
outerBatches = hjstate->hj_OuterBatches;
|
|
innerBatches = hjstate->hj_InnerBatches;
|
|
nbatch = hashtable->nbatch;
|
|
newbatch = hashtable->curbatch + 1;
|
|
|
|
/* ------------------
|
|
* this is the last process, so it will do the cleanup and
|
|
* batch-switching.
|
|
* ------------------
|
|
*/
|
|
if (newbatch == 1) {
|
|
/*
|
|
* if it is end of the first pass, flush all the last pages for
|
|
* the batches.
|
|
*/
|
|
outerBatches = hjstate->hj_OuterBatches;
|
|
for (i=0; i<nbatch; i++) {
|
|
cc = FileSeek(outerBatches[i], 0L, SEEK_END);
|
|
if (cc < 0)
|
|
perror("FileSeek");
|
|
cc = FileWrite(outerBatches[i],
|
|
ABSADDR(hashtable->batch) + i * BLCKSZ, BLCKSZ);
|
|
NDirectFileWrite++;
|
|
if (cc < 0)
|
|
perror("FileWrite");
|
|
}
|
|
}
|
|
if (newbatch > 1) {
|
|
/*
|
|
* remove the previous outer batch
|
|
*/
|
|
FileUnlink(outerBatches[newbatch - 2]);
|
|
}
|
|
/*
|
|
* rebuild the hash table for the new inner batch
|
|
*/
|
|
innerBatchSizes = (int*)ABSADDR(hashtable->innerbatchSizes);
|
|
/* --------------
|
|
* skip over empty inner batches
|
|
* --------------
|
|
*/
|
|
while (newbatch <= nbatch && innerBatchSizes[newbatch - 1] == 0) {
|
|
FileUnlink(outerBatches[newbatch-1]);
|
|
FileUnlink(innerBatches[newbatch-1]);
|
|
newbatch++;
|
|
}
|
|
if (newbatch > nbatch) {
|
|
hashtable->pcount = hashtable->nprocess;
|
|
|
|
return newbatch;
|
|
}
|
|
ExecHashTableReset(hashtable, innerBatchSizes[newbatch - 1]);
|
|
|
|
|
|
econtext = hjstate->jstate.cs_ExprContext;
|
|
innerhashkey = hjstate->hj_InnerHashKey;
|
|
readPos = NULL;
|
|
readBlk = 0;
|
|
readBuf = ABSADDR(hashtable->readbuf);
|
|
|
|
while ((slot = ExecHashJoinGetSavedTuple(hjstate,
|
|
readBuf,
|
|
innerBatches[newbatch-1],
|
|
hjstate->hj_HashTupleSlot,
|
|
&readBlk,
|
|
&readPos))
|
|
&& ! TupIsNull(slot)) {
|
|
econtext->ecxt_innertuple = slot;
|
|
ExecHashTableInsert(hashtable, econtext, innerhashkey,NULL);
|
|
/* possible bug - glass */
|
|
}
|
|
|
|
|
|
/* -----------------
|
|
* only the last process comes to this branch
|
|
* now all the processes have finished the build phase
|
|
* ----------------
|
|
*/
|
|
|
|
/*
|
|
* after we build the hash table, the inner batch is no longer needed
|
|
*/
|
|
FileUnlink(innerBatches[newbatch - 1]);
|
|
hjstate->hj_OuterReadPos = NULL;
|
|
hashtable->pcount = hashtable->nprocess;
|
|
|
|
hashtable->curbatch = newbatch;
|
|
return newbatch;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecHashJoinGetBatch
|
|
*
|
|
* determine the batch number for a bucketno
|
|
* +----------------+-------+-------+ ... +-------+
|
|
* 0 nbuckets totalbuckets
|
|
* batch 0 1 2 ...
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
int
|
|
ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable, int nbatch)
|
|
{
|
|
int b;
|
|
if (bucketno < hashtable->nbuckets || nbatch == 0)
|
|
return 0;
|
|
|
|
b = (float)(bucketno - hashtable->nbuckets) /
|
|
(float)(hashtable->totalbuckets - hashtable->nbuckets) *
|
|
nbatch;
|
|
return b+1;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------
|
|
* ExecHashJoinSaveTuple
|
|
*
|
|
* save a tuple to a tmp file using a buffer.
|
|
* the first few bytes in a page is an offset to the end
|
|
* of the page.
|
|
* ----------------------------------------------------------------
|
|
*/
|
|
|
|
char *
|
|
ExecHashJoinSaveTuple(HeapTuple heapTuple,
|
|
char *buffer,
|
|
File file,
|
|
char *position)
|
|
{
|
|
long *pageend;
|
|
char *pagestart;
|
|
char *pagebound;
|
|
int cc;
|
|
|
|
pageend = (long*)buffer;
|
|
pagestart = (char*)(buffer + sizeof(long));
|
|
pagebound = buffer + BLCKSZ;
|
|
if (position == NULL)
|
|
position = pagestart;
|
|
|
|
if (position + heapTuple->t_len >= pagebound) {
|
|
cc = FileSeek(file, 0L, SEEK_END);
|
|
if (cc < 0)
|
|
perror("FileSeek");
|
|
cc = FileWrite(file, buffer, BLCKSZ);
|
|
NDirectFileWrite++;
|
|
if (cc < 0)
|
|
perror("FileWrite");
|
|
position = pagestart;
|
|
*pageend = 0;
|
|
}
|
|
memmove(position, heapTuple, heapTuple->t_len);
|
|
position = (char*)LONGALIGN(position + heapTuple->t_len);
|
|
*pageend = position - buffer;
|
|
|
|
return position;
|
|
}
|