1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-13 07:41:39 +03:00

Revert "Change mbbisearch to return the character range"

This reverts commit 78ab944cd4.

After I had committed eb0d0d2c7 and 78ab944cd, I decided to add
a sanity check for a "can't happen" scenario just to be cautious.
It turned out that it already happened in the official Unicode source
data, namely that a character can be both wide and a combining
character. This fact renders the aforementioned commits unnecessary,
so revert both of them.

Discussion:
https://www.postgresql.org/message-id/CAFBsxsH5ejH4-1xaTLpSK8vWoK1m6fA1JBtTM6jmBsLfmDki1g%40mail.gmail.com
This commit is contained in:
John Naylor
2021-08-26 09:58:28 -04:00
parent 0d906b2c0b
commit f8c8a8bccc
3 changed files with 203 additions and 208 deletions

View File

@ -17,7 +17,7 @@ my $count = 0;
print
"/* generated by src/common/unicode/generate-unicode_width_table.pl, do not edit */\n\n";
print "static const struct mbinterval wcwidth[] = {\n";
print "static const struct mbinterval combining[] = {\n";
foreach my $line (<ARGV>)
{
@ -40,7 +40,7 @@ foreach my $line (<ARGV>)
# not a combining character, print out previous range if any
if (defined($range_start))
{
printf "\t{0x%04X, 0x%04X, 0},\n", $range_start, $prev_codepoint;
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
$range_start = undef;
}
}

View File

@ -585,18 +585,17 @@ struct mbinterval
{
unsigned short first;
unsigned short last;
signed int width;
};
/* auxiliary function for binary search in interval table */
static const struct mbinterval *
static int
mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
{
int min = 0;
int mid;
if (ucs < table[0].first || ucs > table[max].last)
return NULL;
return 0;
while (max >= min)
{
mid = (min + max) / 2;
@ -605,10 +604,10 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
else if (ucs < table[mid].first)
max = mid - 1;
else
return &table[mid];
return 1;
}
return NULL;
return 0;
}
@ -647,8 +646,6 @@ ucs_wcwidth(pg_wchar ucs)
{
#include "common/unicode_width_table.h"
const struct mbinterval *range;
/* test for 8-bit control characters */
if (ucs == 0)
return 0;
@ -656,12 +653,10 @@ ucs_wcwidth(pg_wchar ucs)
if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
return -1;
/* binary search in table of character widths */
range = mbbisearch(ucs, wcwidth,
sizeof(wcwidth) / sizeof(struct mbinterval) - 1);
if (range != NULL)
return range->width;
/* binary search in table of non-spacing characters */
if (mbbisearch(ucs, combining,
sizeof(combining) / sizeof(struct mbinterval) - 1))
return 0;
/*
* if we arrive here, ucs is not a combining or C0/C1 control character