mirror of
https://github.com/postgres/postgres.git
synced 2025-07-03 20:02:46 +03:00
Add SQL functions for Unicode normalization
This adds SQL expressions NORMALIZE() and IS NORMALIZED to convert and check Unicode normal forms, per SQL standard. To support fast IS NORMALIZED tests, we pull in a new data file DerivedNormalizationProps.txt from Unicode and build a lookup table from that, using techniques similar to ones already used for other Unicode data. make update-unicode will keep it up to date. We only build and use these tables for the NFC and NFKC forms, because they are too big for NFD and NFKD and the improvement is not significant enough there. Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Andreas Karlsson <andreas@proxel.se> Discussion: https://www.postgresql.org/message-id/flat/c1909f27-c269-2ed9-12f8-3ab72c8caf7a@2ndquadrant.com
This commit is contained in:
@ -444,6 +444,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
|
||||
%type <list> substr_list trim_list
|
||||
%type <list> opt_interval interval_second
|
||||
%type <node> overlay_placing substr_from substr_for
|
||||
%type <str> unicode_normal_form
|
||||
|
||||
%type <boolean> opt_instead
|
||||
%type <boolean> opt_unique opt_concurrently opt_verbose opt_full
|
||||
@ -664,7 +665,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
|
||||
|
||||
MAPPING MATCH MATERIALIZED MAXVALUE METHOD MINUTE_P MINVALUE MODE MONTH_P MOVE
|
||||
|
||||
NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NONE
|
||||
NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NFC NFD NFKC NFKD NO NONE
|
||||
NORMALIZE NORMALIZED
|
||||
NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF
|
||||
NULLS_P NUMERIC
|
||||
|
||||
@ -13491,6 +13493,22 @@ a_expr: c_expr { $$ = $1; }
|
||||
list_make1($1), @2),
|
||||
@2);
|
||||
}
|
||||
| a_expr IS NORMALIZED %prec IS
|
||||
{
|
||||
$$ = (Node *) makeFuncCall(SystemFuncName("is_normalized"), list_make1($1), @2);
|
||||
}
|
||||
| a_expr IS unicode_normal_form NORMALIZED %prec IS
|
||||
{
|
||||
$$ = (Node *) makeFuncCall(SystemFuncName("is_normalized"), list_make2($1, makeStringConst($3, @3)), @2);
|
||||
}
|
||||
| a_expr IS NOT NORMALIZED %prec IS
|
||||
{
|
||||
$$ = makeNotExpr((Node *) makeFuncCall(SystemFuncName("is_normalized"), list_make1($1), @2), @2);
|
||||
}
|
||||
| a_expr IS NOT unicode_normal_form NORMALIZED %prec IS
|
||||
{
|
||||
$$ = makeNotExpr((Node *) makeFuncCall(SystemFuncName("is_normalized"), list_make2($1, makeStringConst($4, @4)), @2), @2);
|
||||
}
|
||||
| DEFAULT
|
||||
{
|
||||
/*
|
||||
@ -13934,6 +13952,14 @@ func_expr_common_subexpr:
|
||||
{
|
||||
$$ = (Node *) makeFuncCall(SystemFuncName("date_part"), $3, @1);
|
||||
}
|
||||
| NORMALIZE '(' a_expr ')'
|
||||
{
|
||||
$$ = (Node *) makeFuncCall(SystemFuncName("normalize"), list_make1($3), @1);
|
||||
}
|
||||
| NORMALIZE '(' a_expr ',' unicode_normal_form ')'
|
||||
{
|
||||
$$ = (Node *) makeFuncCall(SystemFuncName("normalize"), list_make2($3, makeStringConst($5, @5)), @1);
|
||||
}
|
||||
| OVERLAY '(' overlay_list ')'
|
||||
{
|
||||
/* overlay(A PLACING B FROM C FOR D) is converted to
|
||||
@ -14569,6 +14595,13 @@ extract_arg:
|
||||
| Sconst { $$ = $1; }
|
||||
;
|
||||
|
||||
unicode_normal_form:
|
||||
NFC { $$ = "nfc"; }
|
||||
| NFD { $$ = "nfd"; }
|
||||
| NFKC { $$ = "nfkc"; }
|
||||
| NFKD { $$ = "nfkd"; }
|
||||
;
|
||||
|
||||
/* OVERLAY() arguments
|
||||
* SQL99 defines the OVERLAY() function:
|
||||
* o overlay(text placing text from int for int)
|
||||
@ -15315,7 +15348,12 @@ unreserved_keyword:
|
||||
| NAMES
|
||||
| NEW
|
||||
| NEXT
|
||||
| NFC
|
||||
| NFD
|
||||
| NFKC
|
||||
| NFKD
|
||||
| NO
|
||||
| NORMALIZED
|
||||
| NOTHING
|
||||
| NOTIFY
|
||||
| NOWAIT
|
||||
@ -15494,6 +15532,7 @@ col_name_keyword:
|
||||
| NATIONAL
|
||||
| NCHAR
|
||||
| NONE
|
||||
| NORMALIZE
|
||||
| NULLIF
|
||||
| NUMERIC
|
||||
| OUT_P
|
||||
|
Reference in New Issue
Block a user