mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Docs review for unaccent: fix grammar, markup, etc.
This commit is contained in:
@ -1,3 +1,5 @@
|
|||||||
|
<!-- $PostgreSQL: pgsql/doc/src/sgml/unaccent.sgml,v 1.6 2010/08/25 02:12:00 tgl Exp $ -->
|
||||||
|
|
||||||
<sect1 id="unaccent">
|
<sect1 id="unaccent">
|
||||||
<title>unaccent</title>
|
<title>unaccent</title>
|
||||||
|
|
||||||
@ -6,24 +8,24 @@
|
|||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<filename>unaccent</> removes accents (diacritic signs) from a lexeme.
|
<filename>unaccent</> is a text search dictionary that removes accents
|
||||||
It's a filtering dictionary, that means its output is
|
(diacritic signs) from lexemes.
|
||||||
always passed to the next dictionary (if any), contrary to the standard
|
It's a filtering dictionary, which means its output is
|
||||||
behavior. Currently, it supports most important accents from European
|
always passed to the next dictionary (if any), unlike the normal
|
||||||
languages.
|
behavior of dictionaries. This allows accent-insensitive processing
|
||||||
|
for full text search.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Limitation: Current implementation of <filename>unaccent</>
|
The current implementation of <filename>unaccent</> cannot be used as a
|
||||||
dictionary cannot be used as a normalizing dictionary for
|
normalizing dictionary for the <filename>thesaurus</filename> dictionary.
|
||||||
<filename>thesaurus</filename> dictionary.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<sect2>
|
<sect2>
|
||||||
<title>Configuration</title>
|
<title>Configuration</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
A <literal>unaccent</> dictionary accepts the following options:
|
An <literal>unaccent</> dictionary accepts the following options:
|
||||||
</para>
|
</para>
|
||||||
<itemizedlist>
|
<itemizedlist>
|
||||||
<listitem>
|
<listitem>
|
||||||
@ -43,23 +45,27 @@
|
|||||||
<itemizedlist>
|
<itemizedlist>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Each line represents pair: character_with_accent character_without_accent
|
Each line represents a pair, consisting of a character with accent
|
||||||
|
followed by a character without accent. The first is translated into
|
||||||
|
the second. For example,
|
||||||
<programlisting>
|
<programlisting>
|
||||||
À A
|
À A
|
||||||
Á A
|
Á A
|
||||||
 A
|
 A
|
||||||
à A
|
à A
|
||||||
Ä A
|
Ä A
|
||||||
Å A
|
Å A
|
||||||
Æ A
|
Æ A
|
||||||
</programlisting>
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Look at <filename>unaccent.rules</>, which is installed in
|
A more complete example, which is directly useful for most European
|
||||||
<filename>$SHAREDIR/tsearch_data/</>, for an example.
|
languages, can be found in <filename>unaccent.rules</>, which is installed
|
||||||
|
in <filename>$SHAREDIR/tsearch_data/</> when the <filename>unaccent</>
|
||||||
|
module is installed.
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
@ -67,23 +73,22 @@
|
|||||||
<title>Usage</title>
|
<title>Usage</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Running the installation script creates a text search template
|
Running the installation script <filename>unaccent.sql</> creates a text
|
||||||
<literal>unaccent</> and a dictionary <literal>unaccent</>
|
search template <literal>unaccent</> and a dictionary <literal>unaccent</>
|
||||||
based on it, with default parameters. You can alter the
|
based on it, with default parameters. You can alter the
|
||||||
parameters, for example
|
parameters, for example
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
=# ALTER TEXT SEARCH DICTIONARY unaccent (RULES='my_rules');
|
mydb=# ALTER TEXT SEARCH DICTIONARY unaccent (RULES='my_rules');
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
or create new dictionaries based on the template.
|
or create new dictionaries based on the template.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
To test the dictionary, you can try
|
To test the dictionary, you can try:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
=# select ts_lexize('unaccent','Hôtel');
|
mydb=# select ts_lexize('unaccent','Hôtel');
|
||||||
ts_lexize
|
ts_lexize
|
||||||
-----------
|
-----------
|
||||||
{Hotel}
|
{Hotel}
|
||||||
@ -92,41 +97,42 @@
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Filtering dictionary are useful for correct work of
|
Here is an example showing how to insert the
|
||||||
<function>ts_headline</function> function.
|
<filename>unaccent</> dictionary into a text search configuration:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
=# CREATE TEXT SEARCH CONFIGURATION fr ( COPY = french );
|
mydb=# CREATE TEXT SEARCH CONFIGURATION fr ( COPY = french );
|
||||||
=# ALTER TEXT SEARCH CONFIGURATION fr
|
mydb=# ALTER TEXT SEARCH CONFIGURATION fr
|
||||||
ALTER MAPPING FOR hword, hword_part, word
|
ALTER MAPPING FOR hword, hword_part, word
|
||||||
WITH unaccent, french_stem;
|
WITH unaccent, french_stem;
|
||||||
=# select to_tsvector('fr','Hôtels de la Mer');
|
mydb=# select to_tsvector('fr','Hôtels de la Mer');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
-------------------
|
-------------------
|
||||||
'hotel':1 'mer':4
|
'hotel':1 'mer':4
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
=# select to_tsvector('fr','Hôtel de la Mer') @@ to_tsquery('fr','Hotels');
|
mydb=# select to_tsvector('fr','Hôtel de la Mer') @@ to_tsquery('fr','Hotels');
|
||||||
?column?
|
?column?
|
||||||
----------
|
----------
|
||||||
t
|
t
|
||||||
(1 row)
|
(1 row)
|
||||||
=# select ts_headline('fr','Hôtel de la Mer',to_tsquery('fr','Hotels'));
|
|
||||||
|
mydb=# select ts_headline('fr','Hôtel de la Mer',to_tsquery('fr','Hotels'));
|
||||||
ts_headline
|
ts_headline
|
||||||
------------------------
|
------------------------
|
||||||
<b>Hôtel</b>de la Mer
|
<b>Hôtel</b> de la Mer
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
</programlisting>
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2>
|
<sect2>
|
||||||
<title>Function</title>
|
<title>Functions</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<function>unaccent</> function removes accents (diacritic signs) from
|
The <function>unaccent()</> function removes accents (diacritic signs) from
|
||||||
argument string. Basically, it's a wrapper around
|
a given string. Basically, it's a wrapper around the
|
||||||
<filename>unaccent</> dictionary.
|
<filename>unaccent</> dictionary, but it can be used outside normal
|
||||||
|
text search contexts.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
@ -134,14 +140,14 @@
|
|||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<synopsis>
|
<synopsis>
|
||||||
unaccent(<optional><replaceable class="PARAMETER">dictionary</replaceable>, </optional> <replaceable class="PARAMETER">string</replaceable>)
|
unaccent(<optional><replaceable class="PARAMETER">dictionary</replaceable>, </optional> <replaceable class="PARAMETER">string</replaceable>) returns <type>text</type>
|
||||||
returns <type>text</type>
|
|
||||||
</synopsis>
|
</synopsis>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
For example:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
SELECT unaccent('unaccent', 'Hôtel');
|
SELECT unaccent('unaccent', 'Hôtel');
|
||||||
SELECT unaccent('Hôtel');
|
SELECT unaccent('Hôtel');
|
||||||
</programlisting>
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
Reference in New Issue
Block a user