mirror of
https://github.com/postgres/postgres.git
synced 2025-06-13 07:41:39 +03:00
Revert "Change mbbisearch to return the character range"
This reverts commit78ab944cd4
. After I had committedeb0d0d2c7
and78ab944cd
, I decided to add a sanity check for a "can't happen" scenario just to be cautious. It turned out that it already happened in the official Unicode source data, namely that a character can be both wide and a combining character. This fact renders the aforementioned commits unnecessary, so revert both of them. Discussion: https://www.postgresql.org/message-id/CAFBsxsH5ejH4-1xaTLpSK8vWoK1m6fA1JBtTM6jmBsLfmDki1g%40mail.gmail.com
This commit is contained in:
@ -17,7 +17,7 @@ my $count = 0;
|
||||
print
|
||||
"/* generated by src/common/unicode/generate-unicode_width_table.pl, do not edit */\n\n";
|
||||
|
||||
print "static const struct mbinterval wcwidth[] = {\n";
|
||||
print "static const struct mbinterval combining[] = {\n";
|
||||
|
||||
foreach my $line (<ARGV>)
|
||||
{
|
||||
@ -40,7 +40,7 @@ foreach my $line (<ARGV>)
|
||||
# not a combining character, print out previous range if any
|
||||
if (defined($range_start))
|
||||
{
|
||||
printf "\t{0x%04X, 0x%04X, 0},\n", $range_start, $prev_codepoint;
|
||||
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
|
||||
$range_start = undef;
|
||||
}
|
||||
}
|
||||
|
@ -585,18 +585,17 @@ struct mbinterval
|
||||
{
|
||||
unsigned short first;
|
||||
unsigned short last;
|
||||
signed int width;
|
||||
};
|
||||
|
||||
/* auxiliary function for binary search in interval table */
|
||||
static const struct mbinterval *
|
||||
static int
|
||||
mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
|
||||
{
|
||||
int min = 0;
|
||||
int mid;
|
||||
|
||||
if (ucs < table[0].first || ucs > table[max].last)
|
||||
return NULL;
|
||||
return 0;
|
||||
while (max >= min)
|
||||
{
|
||||
mid = (min + max) / 2;
|
||||
@ -605,10 +604,10 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
|
||||
else if (ucs < table[mid].first)
|
||||
max = mid - 1;
|
||||
else
|
||||
return &table[mid];
|
||||
return 1;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -647,8 +646,6 @@ ucs_wcwidth(pg_wchar ucs)
|
||||
{
|
||||
#include "common/unicode_width_table.h"
|
||||
|
||||
const struct mbinterval *range;
|
||||
|
||||
/* test for 8-bit control characters */
|
||||
if (ucs == 0)
|
||||
return 0;
|
||||
@ -656,12 +653,10 @@ ucs_wcwidth(pg_wchar ucs)
|
||||
if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
|
||||
return -1;
|
||||
|
||||
/* binary search in table of character widths */
|
||||
range = mbbisearch(ucs, wcwidth,
|
||||
sizeof(wcwidth) / sizeof(struct mbinterval) - 1);
|
||||
|
||||
if (range != NULL)
|
||||
return range->width;
|
||||
/* binary search in table of non-spacing characters */
|
||||
if (mbbisearch(ucs, combining,
|
||||
sizeof(combining) / sizeof(struct mbinterval) - 1))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* if we arrive here, ucs is not a combining or C0/C1 control character
|
||||
|
Reference in New Issue
Block a user