1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-31 22:04:40 +03:00

Add a "row processor" API to libpq for better handling of large results.

Traditionally libpq has collected an entire query result before passing
it back to the application.  That provides a simple and transactional API,
but it's pretty inefficient for large result sets.  This patch allows the
application to process each row on-the-fly instead of accumulating the
rows into the PGresult.  Error recovery becomes a bit more complex, but
often that tradeoff is well worth making.

Kyotaro Horiguchi, reviewed by Marko Kreen and Tom Lane
This commit is contained in:
Tom Lane
2012-04-04 18:27:56 -04:00
parent cb917e1544
commit 92785dac2e
10 changed files with 994 additions and 204 deletions

View File

@ -50,6 +50,9 @@ static bool static_std_strings = false;
static PGEvent *dupEvents(PGEvent *events, int count);
static bool pqAddTuple(PGresult *res, PGresAttValue *tup);
static int pqStdRowProcessor(PGresult *res, const PGdataValue *columns,
const char **errmsgp, void *param);
static bool PQsendQueryStart(PGconn *conn);
static int PQsendQueryGuts(PGconn *conn,
const char *command,
@ -61,6 +64,8 @@ static int PQsendQueryGuts(PGconn *conn,
const int *paramFormats,
int resultFormat);
static void parseInput(PGconn *conn);
static int dummyRowProcessor(PGresult *res, const PGdataValue *columns,
const char **errmsgp, void *param);
static bool PQexecStart(PGconn *conn);
static PGresult *PQexecFinish(PGconn *conn);
static int PQsendDescribe(PGconn *conn, char desc_type,
@ -694,14 +699,12 @@ PQclear(PGresult *res)
/*
* Handy subroutine to deallocate any partially constructed async result.
*/
void
pqClearAsyncResult(PGconn *conn)
{
if (conn->result)
PQclear(conn->result);
conn->result = NULL;
conn->curTuple = NULL;
}
/*
@ -756,7 +759,6 @@ pqPrepareAsyncResult(PGconn *conn)
*/
res = conn->result;
conn->result = NULL; /* handing over ownership to caller */
conn->curTuple = NULL; /* just in case */
if (!res)
res = PQmakeEmptyPGresult(conn, PGRES_FATAL_ERROR);
else
@ -832,7 +834,7 @@ pqInternalNotice(const PGNoticeHooks *hooks, const char *fmt,...)
* add a row pointer to the PGresult structure, growing it if necessary
* Returns TRUE if OK, FALSE if not enough memory to add the row
*/
int
static bool
pqAddTuple(PGresult *res, PGresAttValue *tup)
{
if (res->ntups >= res->tupArrSize)
@ -978,6 +980,124 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
}
/*
* PQsetRowProcessor
* Set function that copies row data out from the network buffer,
* along with a passthrough parameter for it.
*/
void
PQsetRowProcessor(PGconn *conn, PQrowProcessor func, void *param)
{
if (!conn)
return;
if (func)
{
/* set custom row processor */
conn->rowProcessor = func;
conn->rowProcessorParam = param;
}
else
{
/* set default row processor */
conn->rowProcessor = pqStdRowProcessor;
conn->rowProcessorParam = conn;
}
}
/*
* PQgetRowProcessor
* Get current row processor of PGconn.
* If param is not NULL, also store the passthrough parameter at *param.
*/
PQrowProcessor
PQgetRowProcessor(const PGconn *conn, void **param)
{
if (!conn)
{
if (param)
*param = NULL;
return NULL;
}
if (param)
*param = conn->rowProcessorParam;
return conn->rowProcessor;
}
/*
* pqStdRowProcessor
* Add the received row to the PGresult structure
* Returns 1 if OK, -1 if error occurred.
*
* Note: "param" should point to the PGconn, but we don't actually need that
* as of the current coding.
*/
static int
pqStdRowProcessor(PGresult *res, const PGdataValue *columns,
const char **errmsgp, void *param)
{
int nfields = res->numAttributes;
PGresAttValue *tup;
int i;
if (columns == NULL)
{
/* New result set ... we have nothing to do in this function. */
return 1;
}
/*
* Basically we just allocate space in the PGresult for each field and
* copy the data over.
*
* Note: on malloc failure, we return -1 leaving *errmsgp still NULL,
* which caller will take to mean "out of memory". This is preferable to
* trying to set up such a message here, because evidently there's not
* enough memory for gettext() to do anything.
*/
tup = (PGresAttValue *)
pqResultAlloc(res, nfields * sizeof(PGresAttValue), TRUE);
if (tup == NULL)
return -1;
for (i = 0; i < nfields; i++)
{
int clen = columns[i].len;
if (clen < 0)
{
/* null field */
tup[i].len = NULL_LEN;
tup[i].value = res->null_field;
}
else
{
bool isbinary = (res->attDescs[i].format != 0);
char *val;
val = (char *) pqResultAlloc(res, clen + 1, isbinary);
if (val == NULL)
return -1;
/* copy and zero-terminate the data (even if it's binary) */
memcpy(val, columns[i].value, clen);
val[clen] = '\0';
tup[i].len = clen;
tup[i].value = val;
}
}
/* And add the tuple to the PGresult's tuple array */
if (!pqAddTuple(res, tup))
return -1;
/* Success */
return 1;
}
/*
* PQsendQuery
* Submit a query, but don't wait for it to finish
@ -1223,7 +1343,6 @@ PQsendQueryStart(PGconn *conn)
/* initialize async result-accumulation state */
conn->result = NULL;
conn->curTuple = NULL;
/* ready to send command message */
return true;
@ -1468,6 +1587,9 @@ PQconsumeInput(PGconn *conn)
* parseInput: if appropriate, parse input data from backend
* until input is exhausted or a stopping state is reached.
* Note that this function will NOT attempt to read more data from the backend.
*
* Note: callers of parseInput must be prepared for a longjmp exit when we are
* in PGASYNC_BUSY state, since an external row processor might do that.
*/
static void
parseInput(PGconn *conn)
@ -1615,6 +1737,49 @@ PQgetResult(PGconn *conn)
return res;
}
/*
* PQskipResult
* Get the next PGresult produced by a query, but discard any data rows.
*
* This is mainly useful for cleaning up after a longjmp out of a row
* processor, when resuming processing of the current query result isn't
* wanted. Note that this is of little value in an async-style application,
* since any preceding calls to PQisBusy would have already called the regular
* row processor.
*/
PGresult *
PQskipResult(PGconn *conn)
{
PGresult *res;
PQrowProcessor savedRowProcessor;
if (!conn)
return NULL;
/* temporarily install dummy row processor */
savedRowProcessor = conn->rowProcessor;
conn->rowProcessor = dummyRowProcessor;
/* no need to save/change rowProcessorParam */
/* fetch the next result */
res = PQgetResult(conn);
/* restore previous row processor */
conn->rowProcessor = savedRowProcessor;
return res;
}
/*
* Do-nothing row processor for PQskipResult
*/
static int
dummyRowProcessor(PGresult *res, const PGdataValue *columns,
const char **errmsgp, void *param)
{
return 1;
}
/*
* PQexec
@ -1721,7 +1886,7 @@ PQexecStart(PGconn *conn)
* Silently discard any prior query result that application didn't eat.
* This is probably poor design, but it's here for backward compatibility.
*/
while ((result = PQgetResult(conn)) != NULL)
while ((result = PQskipResult(conn)) != NULL)
{
ExecStatusType resultStatus = result->resultStatus;