mirror of
https://github.com/postgres/postgres.git
synced 2025-04-27 22:56:53 +03:00
Improve worst-case performance of text_position_get_match_pos()
This function converts a byte position to a character position after a successful string match. Rather than calling pg_mblen() in a loop, use pg_mbstrlen_with_len() since the latter can inline its own call to pg_mblen(). When the string match is at the end of the haystack text, this change results in 10-20% performance improvement, depending on platform and typical character length in bytes. This also simplifies the code a little. Specializing for UTF-8 could result in further improvement, but the performance gain was not found to be reliable between platforms. The modest gain in this commit is stable between platforms and usable by all server encodings. Discussion: https://www.postgresql.org/message-id/CAFBsxsH1Yutrmu+6LLHKK8iXY+vG--Do6zN+2900spHXQNNQKQ@mail.gmail.com
This commit is contained in:
parent
807fee1a39
commit
b31e3f5613
@ -51,7 +51,6 @@ typedef struct varlena VarString;
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
bool is_multibyte; /* T if multibyte encoding */
|
||||
bool is_multibyte_char_in_char; /* need to check char boundaries? */
|
||||
|
||||
char *str1; /* haystack string */
|
||||
@ -1221,20 +1220,11 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
|
||||
* and continue the search if it was a false match.
|
||||
*/
|
||||
if (pg_database_encoding_max_length() == 1)
|
||||
{
|
||||
state->is_multibyte = false;
|
||||
state->is_multibyte_char_in_char = false;
|
||||
}
|
||||
else if (GetDatabaseEncoding() == PG_UTF8)
|
||||
{
|
||||
state->is_multibyte = true;
|
||||
state->is_multibyte_char_in_char = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
state->is_multibyte = true;
|
||||
state->is_multibyte_char_in_char = true;
|
||||
}
|
||||
|
||||
state->str1 = VARDATA_ANY(t1);
|
||||
state->str2 = VARDATA_ANY(t2);
|
||||
@ -1466,19 +1456,11 @@ text_position_get_match_ptr(TextPositionState *state)
|
||||
static int
|
||||
text_position_get_match_pos(TextPositionState *state)
|
||||
{
|
||||
if (!state->is_multibyte)
|
||||
return state->last_match - state->str1 + 1;
|
||||
else
|
||||
{
|
||||
/* Convert the byte position to char position. */
|
||||
while (state->refpoint < state->last_match)
|
||||
{
|
||||
state->refpoint += pg_mblen(state->refpoint);
|
||||
state->refpos++;
|
||||
}
|
||||
Assert(state->refpoint == state->last_match);
|
||||
return state->refpos + 1;
|
||||
}
|
||||
/* Convert the byte position to char position. */
|
||||
state->refpos += pg_mbstrlen_with_len(state->refpoint,
|
||||
state->last_match - state->refpoint);
|
||||
state->refpoint = state->last_match;
|
||||
return state->refpos + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user