mirror of
https://github.com/postgres/postgres.git
synced 2025-05-02 11:44:50 +03:00
444 lines
16 KiB
Plaintext
444 lines
16 KiB
Plaintext
<!-- doc/src/sgml/xml2.sgml -->
|
|
|
|
<sect1 id="xml2" xreflabel="xml2">
|
|
<title>xml2</title>
|
|
|
|
<indexterm zone="xml2">
|
|
<primary>xml2</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <filename>xml2</filename> module provides XPath querying and
|
|
XSLT functionality.
|
|
</para>
|
|
|
|
<sect2>
|
|
<title>Deprecation Notice</title>
|
|
|
|
<para>
|
|
From <productname>PostgreSQL</productname> 8.3 on, there is XML-related
|
|
functionality based on the SQL/XML standard in the core server.
|
|
That functionality covers XML syntax checking and XPath queries,
|
|
which is what this module does, and more, but the API is
|
|
not at all compatible. It is planned that this module will be
|
|
removed in a future version of PostgreSQL in favor of the newer standard API, so
|
|
you are encouraged to try converting your applications. If you
|
|
find that some of the functionality of this module is not
|
|
available in an adequate form with the newer API, please explain
|
|
your issue to <email>pgsql-hackers@lists.postgresql.org</email> so that the deficiency
|
|
can be addressed.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Description of Functions</title>
|
|
|
|
<para>
|
|
<xref linkend="xml2-functions-table"/> shows the functions provided by this module.
|
|
These functions provide straightforward XML parsing and XPath queries.
|
|
</para>
|
|
|
|
<table id="xml2-functions-table">
|
|
<title><filename>xml2</filename> Functions</title>
|
|
<tgroup cols="1">
|
|
<thead>
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
Function
|
|
</para>
|
|
<para>
|
|
Description
|
|
</para></entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xml_valid</function> ( <parameter>document</parameter> <type>text</type> )
|
|
<returnvalue>boolean</returnvalue>
|
|
</para>
|
|
<para>
|
|
Parses the given document and returns true if the
|
|
document is well-formed XML. (Note: this is an alias for the standard
|
|
PostgreSQL function <function>xml_is_well_formed()</function>. The
|
|
name <function>xml_valid()</function> is technically incorrect since validity
|
|
and well-formedness have different meanings in XML.)
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xpath_string</function> ( <parameter>document</parameter> <type>text</type>, <parameter>query</parameter> <type>text</type> )
|
|
<returnvalue>text</returnvalue>
|
|
</para>
|
|
<para>
|
|
Evaluates the XPath query on the supplied document, and
|
|
casts the result to <type>text</type>.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xpath_number</function> ( <parameter>document</parameter> <type>text</type>, <parameter>query</parameter> <type>text</type> )
|
|
<returnvalue>real</returnvalue>
|
|
</para>
|
|
<para>
|
|
Evaluates the XPath query on the supplied document, and
|
|
casts the result to <type>real</type>.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xpath_bool</function> ( <parameter>document</parameter> <type>text</type>, <parameter>query</parameter> <type>text</type> )
|
|
<returnvalue>boolean</returnvalue>
|
|
</para>
|
|
<para>
|
|
Evaluates the XPath query on the supplied document, and
|
|
casts the result to <type>boolean</type>.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xpath_nodeset</function> ( <parameter>document</parameter> <type>text</type>, <parameter>query</parameter> <type>text</type>, <parameter>toptag</parameter> <type>text</type>, <parameter>itemtag</parameter> <type>text</type> )
|
|
<returnvalue>text</returnvalue>
|
|
</para>
|
|
<para>
|
|
Evaluates the query on the document and wraps the result in XML
|
|
tags. If the result is multivalued, the output will look like:
|
|
<synopsis>
|
|
<toptag>
|
|
<itemtag>Value 1 which could be an XML fragment</itemtag>
|
|
<itemtag>Value 2....</itemtag>
|
|
</toptag>
|
|
</synopsis>
|
|
If either <parameter>toptag</parameter>
|
|
or <parameter>itemtag</parameter> is an empty string, the relevant tag
|
|
is omitted.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xpath_nodeset</function> ( <parameter>document</parameter> <type>text</type>, <parameter>query</parameter> <type>text</type>, <parameter>itemtag</parameter> <type>text</type> )
|
|
<returnvalue>text</returnvalue>
|
|
</para>
|
|
<para>
|
|
Like <function>xpath_nodeset(document, query, toptag, itemtag)</function> but result omits <parameter>toptag</parameter>.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xpath_nodeset</function> ( <parameter>document</parameter> <type>text</type>, <parameter>query</parameter> <type>text</type> )
|
|
<returnvalue>text</returnvalue>
|
|
</para>
|
|
<para>
|
|
Like <function>xpath_nodeset(document, query, toptag, itemtag)</function> but result omits both tags.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xpath_list</function> ( <parameter>document</parameter> <type>text</type>, <parameter>query</parameter> <type>text</type>, <parameter>separator</parameter> <type>text</type> )
|
|
<returnvalue>text</returnvalue>
|
|
</para>
|
|
<para>
|
|
Evaluates the query on the document and returns multiple values
|
|
separated by the specified separator, for example <literal>Value
|
|
1,Value 2,Value 3</literal> if <parameter>separator</parameter>
|
|
is <literal>,</literal>.
|
|
</para></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry role="func_table_entry"><para role="func_signature">
|
|
<function>xpath_list</function> ( <parameter>document</parameter> <type>text</type>, <parameter>query</parameter> <type>text</type> )
|
|
<returnvalue>text</returnvalue>
|
|
</para>
|
|
<para>
|
|
This is a wrapper for the above function that uses <literal>,</literal>
|
|
as the separator.
|
|
</para></entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title><literal>xpath_table</literal></title>
|
|
|
|
<indexterm>
|
|
<primary>xpath_table</primary>
|
|
</indexterm>
|
|
|
|
<synopsis>
|
|
xpath_table(text key, text document, text relation, text xpaths, text criteria) returns setof record
|
|
</synopsis>
|
|
|
|
<para>
|
|
<function>xpath_table</function> is a table function that evaluates a set of XPath
|
|
queries on each of a set of documents and returns the results as a
|
|
table. The primary key field from the original document table is returned
|
|
as the first column of the result so that the result set
|
|
can readily be used in joins. The parameters are described in
|
|
<xref linkend="xml2-xpath-table-parameters"/>.
|
|
</para>
|
|
|
|
<table id="xml2-xpath-table-parameters">
|
|
<title><function>xpath_table</function> Parameters</title>
|
|
<tgroup cols="2">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Parameter</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><parameter>key</parameter></entry>
|
|
<entry>
|
|
<para>
|
|
the name of the <quote>key</quote> field — this is just a field to be used as
|
|
the first column of the output table, i.e., it identifies the record from
|
|
which each output row came (see note below about multiple values)
|
|
</para>
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry><parameter>document</parameter></entry>
|
|
<entry>
|
|
<para>
|
|
the name of the field containing the XML document
|
|
</para>
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry><parameter>relation</parameter></entry>
|
|
<entry>
|
|
<para>
|
|
the name of the table or view containing the documents
|
|
</para>
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry><parameter>xpaths</parameter></entry>
|
|
<entry>
|
|
<para>
|
|
one or more XPath expressions, separated by <literal>|</literal>
|
|
</para>
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry><parameter>criteria</parameter></entry>
|
|
<entry>
|
|
<para>
|
|
the contents of the WHERE clause. This cannot be omitted, so use
|
|
<literal>true</literal> or <literal>1=1</literal> if you want to
|
|
process all the rows in the relation
|
|
</para>
|
|
</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
These parameters (except the XPath strings) are just substituted
|
|
into a plain SQL SELECT statement, so you have some flexibility — the
|
|
statement is
|
|
</para>
|
|
|
|
<para>
|
|
<literal>
|
|
SELECT <key>, <document> FROM <relation> WHERE <criteria>
|
|
</literal>
|
|
</para>
|
|
|
|
<para>
|
|
so those parameters can be <emphasis>anything</emphasis> valid in those particular
|
|
locations. The result from this SELECT needs to return exactly two
|
|
columns (which it will unless you try to list multiple fields for key
|
|
or document). Beware that this simplistic approach requires that you
|
|
validate any user-supplied values to avoid SQL injection attacks.
|
|
</para>
|
|
|
|
<para>
|
|
The function has to be used in a <literal>FROM</literal> expression, with an
|
|
<literal>AS</literal> clause to specify the output columns; for example
|
|
<programlisting>
|
|
SELECT * FROM
|
|
xpath_table('article_id',
|
|
'article_xml',
|
|
'articles',
|
|
'/article/author|/article/pages|/article/title',
|
|
'date_entered > ''2003-01-01'' ')
|
|
AS t(article_id integer, author text, page_count integer, title text);
|
|
</programlisting>
|
|
The <literal>AS</literal> clause defines the names and types of the columns in the
|
|
output table. The first is the <quote>key</quote> field and the rest correspond
|
|
to the XPath queries.
|
|
If there are more XPath queries than result columns,
|
|
the extra queries will be ignored. If there are more result columns
|
|
than XPath queries, the extra columns will be NULL.
|
|
</para>
|
|
|
|
<para>
|
|
Notice that this example defines the <structname>page_count</structname> result
|
|
column as an integer. The function deals internally with string
|
|
representations, so when you say you want an integer in the output, it will
|
|
take the string representation of the XPath result and use PostgreSQL input
|
|
functions to transform it into an integer (or whatever type the <type>AS</type>
|
|
clause requests). An error will result if it can't do this — for
|
|
example if the result is empty — so you may wish to just stick to
|
|
<type>text</type> as the column type if you think your data has any problems.
|
|
</para>
|
|
|
|
<para>
|
|
The calling <command>SELECT</command> statement doesn't necessarily have to be
|
|
just <literal>SELECT *</literal> — it can reference the output
|
|
columns by name or join them to other tables. The function produces a
|
|
virtual table with which you can perform any operation you wish (e.g.,
|
|
aggregation, joining, sorting etc.). So we could also have:
|
|
<programlisting>
|
|
SELECT t.title, p.fullname, p.email
|
|
FROM xpath_table('article_id', 'article_xml', 'articles',
|
|
'/article/title|/article/author/@id',
|
|
'xpath_string(article_xml,''/article/@date'') > ''2003-03-20'' ')
|
|
AS t(article_id integer, title text, author_id integer),
|
|
tblPeopleInfo AS p
|
|
WHERE t.author_id = p.person_id;
|
|
</programlisting>
|
|
as a more complicated example. Of course, you could wrap all
|
|
of this in a view for convenience.
|
|
</para>
|
|
|
|
<sect3>
|
|
<title>Multivalued Results</title>
|
|
|
|
<para>
|
|
The <function>xpath_table</function> function assumes that the results of each XPath query
|
|
might be multivalued, so the number of rows returned by the function
|
|
may not be the same as the number of input documents. The first row
|
|
returned contains the first result from each query, the second row the
|
|
second result from each query. If one of the queries has fewer values
|
|
than the others, null values will be returned instead.
|
|
</para>
|
|
|
|
<para>
|
|
In some cases, a user will know that a given XPath query will return
|
|
only a single result (perhaps a unique document identifier) — if used
|
|
alongside an XPath query returning multiple results, the single-valued
|
|
result will appear only on the first row of the result. The solution
|
|
to this is to use the key field as part of a join against a simpler
|
|
XPath query. As an example:
|
|
|
|
<programlisting>
|
|
CREATE TABLE test (
|
|
id int PRIMARY KEY,
|
|
xml text
|
|
);
|
|
|
|
INSERT INTO test VALUES (1, '<doc num="C1">
|
|
<line num="L1"><a>1</a><b>2</b><c>3</c></line>
|
|
<line num="L2"><a>11</a><b>22</b><c>33</c></line>
|
|
</doc>');
|
|
|
|
INSERT INTO test VALUES (2, '<doc num="C2">
|
|
<line num="L1"><a>111</a><b>222</b><c>333</c></line>
|
|
<line num="L2"><a>111</a><b>222</b><c>333</c></line>
|
|
</doc>');
|
|
|
|
SELECT * FROM
|
|
xpath_table('id','xml','test',
|
|
'/doc/@num|/doc/line/@num|/doc/line/a|/doc/line/b|/doc/line/c',
|
|
'true')
|
|
AS t(id int, doc_num varchar(10), line_num varchar(10), val1 int, val2 int, val3 int)
|
|
WHERE id = 1 ORDER BY doc_num, line_num
|
|
|
|
id | doc_num | line_num | val1 | val2 | val3
|
|
----+---------+----------+------+------+------
|
|
1 | C1 | L1 | 1 | 2 | 3
|
|
1 | | L2 | 11 | 22 | 33
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
To get <literal>doc_num</literal> on every line, the solution is to use two invocations
|
|
of <function>xpath_table</function> and join the results:
|
|
|
|
<programlisting>
|
|
SELECT t.*,i.doc_num FROM
|
|
xpath_table('id', 'xml', 'test',
|
|
'/doc/line/@num|/doc/line/a|/doc/line/b|/doc/line/c',
|
|
'true')
|
|
AS t(id int, line_num varchar(10), val1 int, val2 int, val3 int),
|
|
xpath_table('id', 'xml', 'test', '/doc/@num', 'true')
|
|
AS i(id int, doc_num varchar(10))
|
|
WHERE i.id=t.id AND i.id=1
|
|
ORDER BY doc_num, line_num;
|
|
|
|
id | line_num | val1 | val2 | val3 | doc_num
|
|
----+----------+------+------+------+---------
|
|
1 | L1 | 1 | 2 | 3 | C1
|
|
1 | L2 | 11 | 22 | 33 | C1
|
|
(2 rows)
|
|
</programlisting>
|
|
</para>
|
|
</sect3>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>XSLT Functions</title>
|
|
|
|
<para>
|
|
The following functions are available if libxslt is installed:
|
|
</para>
|
|
|
|
<sect3>
|
|
<title><literal>xslt_process</literal></title>
|
|
|
|
<indexterm>
|
|
<primary>xslt_process</primary>
|
|
</indexterm>
|
|
|
|
<synopsis>
|
|
xslt_process(text document, text stylesheet, text paramlist) returns text
|
|
</synopsis>
|
|
|
|
<para>
|
|
This function applies the XSL stylesheet to the document and returns
|
|
the transformed result. The <literal>paramlist</literal> is a list of parameter
|
|
assignments to be used in the transformation, specified in the form
|
|
<literal>a=1,b=2</literal>. Note that the
|
|
parameter parsing is very simple-minded: parameter values cannot
|
|
contain commas!
|
|
</para>
|
|
|
|
<para>
|
|
There is also a two-parameter version of <function>xslt_process</function> which
|
|
does not pass any parameters to the transformation.
|
|
</para>
|
|
</sect3>
|
|
</sect2>
|
|
|
|
<sect2>
|
|
<title>Author</title>
|
|
|
|
<para>
|
|
John Gray <email>jgray@azuli.co.uk</email>
|
|
</para>
|
|
|
|
<para>
|
|
Development of this module was sponsored by Torchbox Ltd. (www.torchbox.com).
|
|
It has the same BSD license as PostgreSQL.
|
|
</para>
|
|
</sect2>
|
|
|
|
</sect1>
|