mirror of
https://gitlab.gnome.org/GNOME/libxslt
synced 2026-01-07 21:58:22 +03:00
Updated the documentation, merged in the internals.html and
extensions.html pages to the site, cleanups, Daniel
This commit is contained in:
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
@@ -1,129 +1,144 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||
"http://www.w3.org/TR/html4/loose.dtd">
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>Writing extensions for XSLT C library for Gnome</title>
|
||||
<meta name="GENERATOR" content="amaya V5.0">
|
||||
<meta http-equiv="Content-Type" content="text/html">
|
||||
<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">
|
||||
<style type="text/css"><!--
|
||||
TD {font-size: 10pt; font-family: Verdana,Arial,Helvetica}
|
||||
BODY {font-size: 10pt; font-family: Verdana,Arial,Helvetica; margin-top: 5pt; margin-left: 0pt; margin-right: 0pt}
|
||||
H1 {font-size: 16pt; font-family: Verdana,Arial,Helvetica}
|
||||
H2 {font-size: 14pt; font-family: Verdana,Arial,Helvetica}
|
||||
H3 {font-size: 12pt; font-family: Verdana,Arial,Helvetica}
|
||||
A:link, A:visited, A:active { text-decoration: underline }
|
||||
--></style>
|
||||
<title>Writing extensions</title>
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff">
|
||||
<p><a href="http://www.gnome.org/"><img src="smallfootonly.gif"
|
||||
alt="Gnome Logo"></a><a href="http://www.redhat.com/"><img src="redhat.gif"
|
||||
alt="Red Hat Logo"></a></p>
|
||||
|
||||
<h1 align="center">Writing extensions for XSLT C library for Gnome</h1>
|
||||
|
||||
<p></p>
|
||||
|
||||
<p>Location: <a
|
||||
href="http://xmlsoft.org/XSLT/extensions.html">http://xmlsoft.org/XSLT/extensions.html</a></p>
|
||||
|
||||
<p>Libxslt home page: <a
|
||||
href="http://xmlsoft.org/XSLT/">http://xmlsoft.org/XSLT/</a></p>
|
||||
|
||||
<p>mailing-list archives: <a
|
||||
href="http://mail.gnome.org/archives/xslt/">http://mail.gnome.org/archives/xslt/</a></p>
|
||||
|
||||
<p>Version: $Revision$</p>
|
||||
|
||||
<h2>Table of content</h2>
|
||||
<body bgcolor="#8b7765" text="#000000" link="#000000" vlink="#000000">
|
||||
<table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr>
|
||||
<td width="100">
|
||||
<a href="http://www.gnome.org/"><img src="smallfootonly.gif" alt="Gnome Logo"></a><a href="http://www.redhat.com"><img src="redhat.gif" alt="Red Hat Logo"></a>
|
||||
</td>
|
||||
<td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center">
|
||||
<h1>The XSLT C library for Gnome</h1>
|
||||
<h2>Writing extensions</h2>
|
||||
</td></tr></table></td></tr></table></td>
|
||||
</tr></table>
|
||||
<table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr>
|
||||
<td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td>
|
||||
<table width="100%" border="0" cellspacing="1" cellpadding="3">
|
||||
<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Main Menu</b></center></td></tr>
|
||||
<tr><td bgcolor="#fffacd"><ul style="margin-left: -2pt">
|
||||
<li><a href="index.html">Home</a></li>
|
||||
<li><a href="intro.html">Introduction</a></li>
|
||||
<li><a href="docs.html">Documentation</a></li>
|
||||
<li><a href="bugs.html">Reporting bugs and getting help</a></li>
|
||||
<li><a href="help.html">How to help</a></li>
|
||||
<li><a href="downloads.html">Downloads</a></li>
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
</li>
|
||||
</ul></td></tr>
|
||||
</table>
|
||||
<table width="100%" border="0" cellspacing="1" cellpadding="3">
|
||||
<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr>
|
||||
<tr><td bgcolor="#fffacd"><ul style="margin-left: -2pt">
|
||||
<li><a href="tutorial/libxslttutorial.html">Tutorial</a></li>
|
||||
<li><a href="xsltproc.html">Man page for xsltproc</a></li>
|
||||
<li><a href="http://mail.gnome.org/archives/xslt/">Mail archive</a></li>
|
||||
<li><a href="http://xmlsoft.org/">XML libxml</a></li>
|
||||
<li><a href="http://www.cs.unibo.it/~casarini/gdome2/">DOM gdome2</a></li>
|
||||
<li><a href="ftp://xmlsoft.org/">FTP</a></li>
|
||||
<li><a href="http://www.fh-frankfurt.de/~igor/projects/libxml/">Windows binaries</a></li>
|
||||
<li><a href="http://pages.eidosnet.co.uk/~garypen/libxml/">Solaris binaries</a></li>
|
||||
<li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxslt">Bug Tracker</a></li>
|
||||
</ul></td></tr>
|
||||
</table>
|
||||
</td></tr></table></td>
|
||||
<td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd">
|
||||
<h3>Table of content</h3>
|
||||
<ul>
|
||||
<li><a href="#Introducti">Introduction</a></li>
|
||||
<li><a href="#Basics">Basics</a></li>
|
||||
<li><a href="#Keep">Extension modules</a></li>
|
||||
<li><a href="#Registerin">Registering a module</a></li>
|
||||
<li><a href="#libxml">Loading a module</a></li>
|
||||
<li><a href="#Registerin1">Registering an extension function</a></li>
|
||||
<li><a href="#Implementi">Implementing an extension function</a></li>
|
||||
<li><a href="#Examples">Examples for extension functions</a></li>
|
||||
<li><a href="#Registerin2">Registering an extension element</a></li>
|
||||
<li><a href="#Implementi1">Implementing an extension element</a></li>
|
||||
<li><a href="#Example">Example for extension elements</a></li>
|
||||
<li><a href="#shutdown">The shutdown of a module</a></li>
|
||||
<li><a href="#Future">Future work</a></li>
|
||||
<li><a href="extensions.html#Introducti">Introduction</a></li>
|
||||
<li><a href="extensions.html#Basics">Basics</a></li>
|
||||
<li><a href="extensions.html#Keep">Extension modules</a></li>
|
||||
<li><a href="extensions.html#Registerin">Registering a module</a></li>
|
||||
<li><a href="extensions.html#module">Loading a module</a></li>
|
||||
<li><a href="extensions.html#Registerin1">Registering an extension
|
||||
function</a></li>
|
||||
<li><a href="extensions.html#Implementi">Implementing an extension
|
||||
function</a></li>
|
||||
<li><a href="extensions.html#Examples">Examples for extension
|
||||
functions</a></li>
|
||||
<li><a href="extensions.html#Registerin2">Registering an extension
|
||||
element</a></li>
|
||||
<li><a href="extensions.html#Implementi1">Implementing an extension
|
||||
element</a></li>
|
||||
<li><a href="extensions.html#Example">Example for extension
|
||||
elements</a></li>
|
||||
<li><a href="extensions.html#shutdown">The shutdown of a module</a></li>
|
||||
<li><a href="extensions.html#Future">Future work</a></li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="Introducti">Introduction</a></h2>
|
||||
|
||||
<h3><a name="Introducti1">Introduction</a></h3>
|
||||
<p>This document describes the work needed to write extensions to the
|
||||
standard XSLT library for use with <a
|
||||
href="http://xmlsoft.org/XSLT/">libxslt</a>, the <a
|
||||
href="http://www.w3.org/TR/xslt">XSLT</a> C library developped for the <a
|
||||
href="http://www.gnome.org/">Gnome</a> project.</p>
|
||||
|
||||
standard XSLT library for use with <a href="http://xmlsoft.org/XSLT/">libxslt</a>, the <a href="http://www.w3.org/TR/xslt">XSLT</a> C library developped for the <a href="http://www.gnome.org/">Gnome</a> project.</p>
|
||||
<p>Before starting reading this document it is highly recommended to get
|
||||
familiar with <a href="internals.html">the libxslt internals</a>.</p>
|
||||
|
||||
<p>Note: this documentation is by definition incomplete and I am not good at
|
||||
spelling, grammar, so patches and suggestions are <a
|
||||
href="mailto:veillard@redhat.com">really welcome</a>.</p>
|
||||
|
||||
<h2><a name="Basics">Basics</a></h2>
|
||||
|
||||
spelling, grammar, so patches and suggestions are <a href="mailto:veillard@redhat.com">really welcome</a>.</p>
|
||||
<h3><a name="Basics">Basics</a></h3>
|
||||
<p>The <a href="http://www.w3.org/TR/xslt">XSLT specification</a> provides
|
||||
two <a href="http://www.w3.org/TR/xslt">ways to extend an XSLT engine</a>:</p>
|
||||
<ul>
|
||||
<li>providing <a href="http://www.w3.org/TR/xslt">new extension
|
||||
<li>providing <a href="http://www.w3.org/TR/xslt">new extension
|
||||
functions</a> which can be called from XPath expressions</li>
|
||||
<li>providing <a href="http://www.w3.org/TR/xslt">new extension
|
||||
<li>providing <a href="http://www.w3.org/TR/xslt">new extension
|
||||
elements</a> which can be inserted in stylesheets</li>
|
||||
</ul>
|
||||
|
||||
<p>In both cases the extensions need to be associated to a new namespace,
|
||||
i.e. an URI used as the name for the extension's namespace (there is no need
|
||||
to have a resource there for this to work).</p>
|
||||
|
||||
<p>libxslt provides a few extensions itself, either in libxslt namespace
|
||||
"http://xmlsoft.org/XSLT/" or in other namespace for well known extensions
|
||||
"http://xmlsoft.org/XSLT/" or in other namespace for well known extensions
|
||||
provided by other XSLT processors like Saxon, Xalan or XT.</p>
|
||||
|
||||
<h2><a name="Keep">Extension modules</a></h2>
|
||||
|
||||
<h3><a name="Keep">Extension modules</a></h3>
|
||||
<p>Since extensions are bound to a namespace name, usually sets of extensions
|
||||
coming from a given source are using the same namespace name defining in
|
||||
practice a group of extensions providing elements, functions or both. From
|
||||
libxslt point of view those are considered as an "extension module", and most
|
||||
libxslt point of view those are considered as an "extension module", and most
|
||||
of the APIs work at a module point of view.</p>
|
||||
|
||||
<p>Registration of new functions or elements are bound to the activation of
|
||||
the module, this is currently done by declaring the namespace as an extension
|
||||
by using the attribute <code>extension-element-prefixes</code> on the
|
||||
<code><a href="http://www.w3.org/TR/xslt">xsl:stylesheet</a></code>
|
||||
element.</p>
|
||||
|
||||
<p>And extension module is defined by 3 objects:</p>
|
||||
<ul>
|
||||
<li>the namespace name associated</li>
|
||||
<li>an initialization function</li>
|
||||
<li>a shutdown function</li>
|
||||
<li>the namespace name associated</li>
|
||||
<li>an initialization function</li>
|
||||
<li>a shutdown function</li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="Registerin">Registering a module</a></h2>
|
||||
|
||||
<h3><a name="Registerin">Registering a module</a></h3>
|
||||
<p>Currently a libxslt module has to be compiled within the application using
|
||||
libxslt, there is no code to load dynamically shared libraries associated to
|
||||
namespace (this may be added but is likely to become a portability
|
||||
nightmare).</p>
|
||||
|
||||
<p>So the current way to register a module is to link the code implementing
|
||||
it with the application and to call a registration function:</p>
|
||||
<pre>int xsltRegisterExtModule(const xmlChar *URI,
|
||||
xsltExtInitFunction initFunc,
|
||||
xsltExtShutdownFunction shutdownFunc);</pre>
|
||||
|
||||
<p>The associated header is read by:</p>
|
||||
<pre>#include<libxslt/extensions.h></pre>
|
||||
|
||||
<p>which also defines the type for the initialization and shutdown
|
||||
functions</p>
|
||||
|
||||
<h2><a name="libxml">Loading a module</a></h2>
|
||||
|
||||
<h3><a name="module">Loading a module</a></h3>
|
||||
<p>Once the module URI has been registered and if the XSLT processor detects
|
||||
that a given stylesheet needs the functionalities of an extended module, this
|
||||
one is initialized.</p>
|
||||
|
||||
<p>The xsltExtInitFunction type defines the interface for an initialization
|
||||
function:</p>
|
||||
<pre>/**
|
||||
@@ -139,46 +154,39 @@ function:</p>
|
||||
*/
|
||||
typedef void *(*xsltExtInitFunction)(xsltTransformContextPtr ctxt,
|
||||
const xmlChar *URI);</pre>
|
||||
|
||||
<p>There are 3 things to notice:</p>
|
||||
<ul>
|
||||
<li>the function gets passed the namespace name URI as an argument, this
|
||||
<li>the function gets passed the namespace name URI as an argument, this
|
||||
allow a single function to provide the initialization for multiple
|
||||
logical modules</li>
|
||||
<li>it also gets passed a transformation context, the initialization is
|
||||
<li>it also gets passed a transformation context, the initialization is
|
||||
done at run time before any processing occurs on the stylesheet but it
|
||||
will be invoked separately each time for each transformation</li>
|
||||
<li>it returns a pointer, this can be used to store module specific
|
||||
<li>it returns a pointer, this can be used to store module specific
|
||||
informations which can be retrieved later when a function or an element
|
||||
from the extension are used, an obvious example is a connection to a
|
||||
database which should be kept and reused along the transformation. NULL
|
||||
is a perfectly valid return, there is no way to indicate a failure at
|
||||
this level</li>
|
||||
</ul>
|
||||
|
||||
<p>What this function is expected to do is:</p>
|
||||
<ul>
|
||||
<li>prepare the context for this module (like opening the database
|
||||
<li>prepare the context for this module (like opening the database
|
||||
connection)</li>
|
||||
<li>register the extensions specific to this module</li>
|
||||
<li>register the extensions specific to this module</li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="Registerin1">Registering an extension function</a></h2>
|
||||
|
||||
<h3><a name="Registerin1">Registering an extension function</a></h3>
|
||||
<p>There is a single call to do this registration:</p>
|
||||
<pre>int xsltRegisterExtFunction(xsltTransformContextPtr ctxt,
|
||||
const xmlChar *name,
|
||||
const xmlChar *URI,
|
||||
xmlXPathEvalFunc function);</pre>
|
||||
|
||||
<p>The registration is bound to a single transformation instance referred by
|
||||
ctxt, name is the UTF8 encoded name for the NCName of the function, and URI
|
||||
is the namespace name for the extension (no checking is done, a module could
|
||||
register functions or elements from a different namespace, but it is not
|
||||
recommended).</p>
|
||||
|
||||
<h2><a name="Implementi">Implementing an extension function</a></h2>
|
||||
|
||||
<h3><a name="Implementi">Implementing an extension function</a></h3>
|
||||
<p>The implementation of the function must have the signature of a libxml
|
||||
XPath function:</p>
|
||||
<pre>/**
|
||||
@@ -192,21 +200,18 @@ XPath function:</p>
|
||||
|
||||
typedef void (*xmlXPathEvalFunc)(xmlXPathParserContextPtr ctxt,
|
||||
int nargs);</pre>
|
||||
|
||||
<p>The context passed to an XPath function is not an XSLT context but an <a
|
||||
href="internals.html#XPath1">XPath context</a>. However it is possible to
|
||||
<p>The context passed to an XPath function is not an XSLT context but an <a href="internals.html#XPath1">XPath context</a>. However it is possible to
|
||||
find one from the other:</p>
|
||||
<ul>
|
||||
<li>The function xsltXPathGetTransformContext provide this lookup facility:
|
||||
<li>The function xsltXPathGetTransformContext provide this lookup facility:
|
||||
<pre>xsltTransformContextPtr
|
||||
xsltXPathGetTransformContext
|
||||
(xmlXPathParserContextPtr ctxt);</pre>
|
||||
</li>
|
||||
<li>The <code>xmlXPathContextPtr</code> associated to an
|
||||
</li>
|
||||
<li>The <code>xmlXPathContextPtr</code> associated to an
|
||||
<code>xsltTransformContext</code> is stored in the <code>xpathCtxt</code>
|
||||
field.</li>
|
||||
</ul>
|
||||
|
||||
<p>The first thing an extension function may want to do is to check the
|
||||
arguments passed on the stack, the <code>nargs</code> will precise how many
|
||||
of them were provided on the XPath expression. The macros valuePop will
|
||||
@@ -215,10 +220,8 @@ extract them from the XPath stack:</p>
|
||||
#include <libxml/xpathInternals.h>
|
||||
|
||||
xmlXPathObjectPtr obj = valuePop(ctxt); </pre>
|
||||
|
||||
<p>Note that <code>ctxt</code> is the XPath context not the XSLT one. It is
|
||||
then possible to examine the content of the value. Check <a
|
||||
href="internals.html#Descriptio">the description of XPath objects</a> if
|
||||
then possible to examine the content of the value. Check <a href="internals.html#Descriptio">the description of XPath objects</a> if
|
||||
necessary. The following is a common sequcnce checking whether the argument
|
||||
passed is a string and converting it using the built-in XPath
|
||||
<code>string()</code> function if this is not the case:</p>
|
||||
@@ -227,30 +230,26 @@ passed is a string and converting it using the built-in XPath
|
||||
xmlXPathStringFunction(ctxt, 1);
|
||||
obj = valuePop(ctxt);
|
||||
}</pre>
|
||||
|
||||
<p>Most common XPath functions are available directly at the C level and are
|
||||
exported either in <code><libxml/xpath.h></code> or in
|
||||
<code><libxml/xpathInternals.h></code>.</p>
|
||||
|
||||
<p>The extension function may also need to retrieve the data associated to
|
||||
this module instance (the database connection in the previous example) this
|
||||
can be done using the xsltGetExtData:</p>
|
||||
<pre>void * xsltGetExtData(xsltTransformContextPtr ctxt,
|
||||
const xmlChar *URI);</pre>
|
||||
|
||||
<p>again the URI to be provided is the one used which was used when
|
||||
registering the module.</p>
|
||||
|
||||
<p>Once the function finishes, don't forget to:</p>
|
||||
<ul>
|
||||
<li>push the return value on the stack using <code>valuePush(ctxt,
|
||||
obj)</code></li>
|
||||
<li>deallocate the parameters passed to the function using
|
||||
<code>xmlXPathFreeObject(obj)</code></li>
|
||||
<li>push the return value on the stack using <code>valuePush(ctxt,
|
||||
obj)</code>
|
||||
</li>
|
||||
<li>deallocate the parameters passed to the function using
|
||||
<code>xmlXPathFreeObject(obj)</code>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="Examples">Examples for extension functions</a></h2>
|
||||
|
||||
<h3><a name="Examples">Examples for extension functions</a></h3>
|
||||
<p>The module libxslt/functions.c containsthe sources of the XSLT built-in
|
||||
functions, including document(), key(), generate-id(), etc. as well as a full
|
||||
example module at the end. Here is the test function implementation for the
|
||||
@@ -271,40 +270,34 @@ xsltExtFunctionTest(xmlXPathParserContextPtr ctxt, int nargs)
|
||||
tctxt = xsltXPathGetTransformContext(ctxt);
|
||||
if (tctxt == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtFunctionTest: failed to get the transformation context\n");
|
||||
"xsltExtFunctionTest: failed to get the transformation context\n");
|
||||
return;
|
||||
}
|
||||
data = xsltGetExtData(tctxt, (const xmlChar *) XSLT_DEFAULT_URL);
|
||||
if (data == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtFunctionTest: failed to get module data\n");
|
||||
"xsltExtFunctionTest: failed to get module data\n");
|
||||
return;
|
||||
}
|
||||
#ifdef WITH_XSLT_DEBUG_FUNCTION
|
||||
xsltGenericDebug(xsltGenericDebugContext,
|
||||
"libxslt:test() called with %d args\n", nargs);
|
||||
"libxslt:test() called with %d args\n", nargs);
|
||||
#endif
|
||||
}</pre>
|
||||
|
||||
<h2><a name="Registerin2">Registering an extension function</a></h2>
|
||||
|
||||
<h3><a name="Registerin2">Registering an extension function</a></h3>
|
||||
<p>There is a single call to do this registration:</p>
|
||||
<pre>int xsltRegisterExtElement(xsltTransformContextPtr ctxt,
|
||||
const xmlChar *name,
|
||||
const xmlChar *URI,
|
||||
xsltTransformFunction function);</pre>
|
||||
|
||||
<p>It is similar to the mechanism used to register an extension function,
|
||||
except that the signature of an extension element implementation is
|
||||
different.</p>
|
||||
|
||||
<p>The registration is bound to a single transformation instance referred by
|
||||
ctxt, name is the UTF8 encoded name for the NCName of the element, and URI is
|
||||
the namespace name for the extension (no checking is done, a module could
|
||||
register elements for a different namespace, but it is not recommended).</p>
|
||||
|
||||
<h2><a name="Implementi1">Implementing an extension element</a></h2>
|
||||
|
||||
<h3><a name="Implementi1">Implementing an extension element</a></h3>
|
||||
<p>The implementation of the element must have the signature of an XSLT
|
||||
transformation function:</p>
|
||||
<pre>/**
|
||||
@@ -322,34 +315,27 @@ typedef void (*xsltTransformFunction)
|
||||
xmlNodePtr node,
|
||||
xmlNodePtr inst,
|
||||
xsltStylePreCompPtr comp);</pre>
|
||||
|
||||
<p>The first argument is the XSLT transformation context. The second and
|
||||
third arguments are xmlNodePtr i.e. internal memory <a
|
||||
href="internals.html#libxml">representation of XML nodes</a>. They are
|
||||
third arguments are xmlNodePtr i.e. internal memory <a href="internals.html#libxml">representation of XML nodes</a>. They are
|
||||
respectively <code>node</code> from the the input document being transformed
|
||||
by the stylesheet and <code>inst</code> the extension element in the
|
||||
stylesheet. The last argument is <code>comp</code> a pointer to a precompiled
|
||||
representation of <code>inst</code> but usually for extension function this
|
||||
value is <code>NULL</code> by default (it could be added and associated to
|
||||
the instruction in <code>inst->_private</code>).</p>
|
||||
|
||||
<p>The same functions are available from a function implementing an extension
|
||||
element as in an extension function, including
|
||||
<code>xsltGetExtData()</code>.</p>
|
||||
|
||||
<p>The goal of extension element being usually to enrich the generated
|
||||
output, it is expected that they will grow the currently generated output
|
||||
tree, this can be done by grabbing ctxt->insert which is the current
|
||||
libxml node being generated (Note this can also be the intermediate value
|
||||
tree being built for example to initialize a variable, the processing should
|
||||
be similar). The functions for libxml tree manipulation from <a
|
||||
href="http://xmlsoft.org/html/libxml-tree.html"><libxml/tree.h></a> can
|
||||
be similar). The functions for libxml tree manipulation from <a href="http://xmlsoft.org/html/libxml-tree.html"><libxml/tree.h></a> can
|
||||
be employed to extend or modify the tree, but it is required to preserve the
|
||||
insertion node and its ancestors since there is existing pointers to those
|
||||
elements still in use in the XSLT template execution stack.</p>
|
||||
|
||||
<h2><a name="Example">Example for extension elements</a></h2>
|
||||
|
||||
<h3><a name="Example">Example for extension elements</a></h3>
|
||||
<p>The module libxslt/transform.c containsthe sources of the XSLT built-in
|
||||
elements, including xsl:element, xsl:attribute, xsl:if, etc. There is a small
|
||||
but full example in functions.c providing the implementation for the
|
||||
@@ -372,32 +358,30 @@ xsltExtElementTest(xsltTransformContextPtr ctxt, xmlNodePtr node,
|
||||
|
||||
if (ctxt == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtElementTest: no transformation context\n");
|
||||
"xsltExtElementTest: no transformation context\n");
|
||||
return;
|
||||
}
|
||||
if (node == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtElementTest: no current node\n");
|
||||
"xsltExtElementTest: no current node\n");
|
||||
return;
|
||||
}
|
||||
if (inst == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtElementTest: no instruction\n");
|
||||
"xsltExtElementTest: no instruction\n");
|
||||
return;
|
||||
}
|
||||
if (ctxt->insert == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtElementTest: no insertion point\n");
|
||||
"xsltExtElementTest: no insertion point\n");
|
||||
return;
|
||||
}
|
||||
comment =
|
||||
xmlNewComment((const xmlChar *)
|
||||
"libxslt:test element test worked");
|
||||
"libxslt:test element test worked");
|
||||
xmlAddChild(ctxt->insert, comment);
|
||||
}</pre>
|
||||
|
||||
<h2><a name="shutdown">The shutdown of a module</a></h2>
|
||||
|
||||
<h3><a name="shutdown">The shutdown of a module</a></h3>
|
||||
<p>When the XSLT processor ends a transformation, the shutdown function (if
|
||||
it exists) of all the modules initialized are called.The
|
||||
xsltExtShutdownFunction type defines the interface for a shutdown
|
||||
@@ -413,30 +397,24 @@ function:</p>
|
||||
typedef void (*xsltExtShutdownFunction) (xsltTransformContextPtr ctxt,
|
||||
const xmlChar *URI,
|
||||
void *data);</pre>
|
||||
|
||||
<p>this is really similar to a module initialization function except a third
|
||||
argument is passed, it's the value that was returned by the initialization
|
||||
function. This allow to deallocate resources from the module for example
|
||||
close the connection to the database to keep the same example.</p>
|
||||
|
||||
<h2><a name="Future">Future work</a></h2>
|
||||
|
||||
<h3><a name="Future">Future work</a></h3>
|
||||
<p>Well some of the pieces missing:</p>
|
||||
<ul>
|
||||
<li>a way to load shared libraries to instanciate new modules</li>
|
||||
<li>a better detection of extension function usage and their registration
|
||||
<li>a way to load shared libraries to instanciate new modules</li>
|
||||
<li>a better detection of extension function usage and their registration
|
||||
without having to use the extension prefix which ought to be reserved to
|
||||
element extensions.</li>
|
||||
<li>more examples</li>
|
||||
<li>implementations of the <a href="http://www.exslt.org/">EXSLT</a> common
|
||||
extension libraries, I probably won't have the time needed to do this but
|
||||
this would be a great contribution.
|
||||
<p></p>
|
||||
</li>
|
||||
<li>more examples</li>
|
||||
<li>implementations of the <a href="http://www.exslt.org/">EXSLT</a> common
|
||||
extension libraries, Thomas Broyer nearly finished implementing them.</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
<p><a href="mailto:daniel@veillard.com">Daniel Veillard</a></p>
|
||||
|
||||
<p>$Id$</p>
|
||||
</td></tr></table></td></tr></table></td></tr></table></td>
|
||||
</tr></table></td></tr></table>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
@@ -58,19 +60,19 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
</table>
|
||||
</td></tr></table></td>
|
||||
<td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd">
|
||||
<p>Libxslt is the XSLT C library developped for the Gnome project. XSLT
|
||||
itself is a an XML language to define transformation for XML. Libxslt is
|
||||
based on libxml2 the XML C library developped for the Gnome project.</p>
|
||||
<p>Libxslt is the <a href="http://www.w3.org/TR/xslt">XSLT</a> C library
|
||||
developped for the Gnome project. XSLT itself is a an XML language to define
|
||||
transformation for XML. Libxslt is based on <a href="http://xmlsoft.org/">libxml2</a> the XML C library developped for the
|
||||
Gnome project. It also implements most of the EXSLT set of extensions
|
||||
functions and some of Saxon's evaluate and expressions extensions.</p>
|
||||
<p>People can either embed the library in their application or use xsltproc
|
||||
the command line processing tool.</p>
|
||||
the command line processing tool. This library is free software and can be
|
||||
reused in commercial applications (see the <a href="intro.html">intro</a>)</p>
|
||||
<p>External documents:</p>
|
||||
<ul>
|
||||
<li>John Fleck wrote <a href="tutorial/libxslttutorial.html">a tutorial for
|
||||
libxslt</a>
|
||||
</li>
|
||||
<li><a href="internals.html">white paper on libxslt internals</a></li>
|
||||
<li><a href="extensions.html">documentation on writing extensions for
|
||||
libxslt</a></li>
|
||||
<li><a href="xsltproc.html">xsltproc user manual</a></li>
|
||||
<li><a href="http://xmlsoft.org/">the libxml documentation</a></li>
|
||||
</ul>
|
||||
|
||||
@@ -1,186 +1,188 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>The XSLT C library for Gnome explained</title>
|
||||
<meta name="GENERATOR" content="amaya V4.1">
|
||||
<meta http-equiv="Content-Type" content="text/html">
|
||||
<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">
|
||||
<style type="text/css"><!--
|
||||
TD {font-size: 10pt; font-family: Verdana,Arial,Helvetica}
|
||||
BODY {font-size: 10pt; font-family: Verdana,Arial,Helvetica; margin-top: 5pt; margin-left: 0pt; margin-right: 0pt}
|
||||
H1 {font-size: 16pt; font-family: Verdana,Arial,Helvetica}
|
||||
H2 {font-size: 14pt; font-family: Verdana,Arial,Helvetica}
|
||||
H3 {font-size: 12pt; font-family: Verdana,Arial,Helvetica}
|
||||
A:link, A:visited, A:active { text-decoration: underline }
|
||||
--></style>
|
||||
<title>Library internals</title>
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff">
|
||||
<p><a href="http://www.gnome.org/"><img src="smallfootonly.gif"
|
||||
alt="Gnome Logo"></a><a href="http://www.redhat.com/"><img src="redhat.gif"
|
||||
alt="Red Hat Logo"></a></p>
|
||||
|
||||
<h1 align="center">The XSLT C library for Gnome explained</h1>
|
||||
|
||||
<h1 style="text-align: center">How does it work ?</h1>
|
||||
|
||||
<p></p>
|
||||
|
||||
<p>Location: <a
|
||||
href="http://xmlsoft.org/XSLT/internals.html">http://xmlsoft.org/XSLT/internals.html</a></p>
|
||||
|
||||
<p>Libxslt home page: <a
|
||||
href="http://xmlsoft.org/XSLT/">http://xmlsoft.org/XSLT/</a></p>
|
||||
|
||||
<p>mailing-list archives: <a
|
||||
href="http://mail.gnome.org/archives/xslt/">http://mail.gnome.org/archives/xslt/</a></p>
|
||||
|
||||
<p>Version: $Revision$</p>
|
||||
|
||||
<h2>Table of contents</h2>
|
||||
<body bgcolor="#8b7765" text="#000000" link="#000000" vlink="#000000">
|
||||
<table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr>
|
||||
<td width="100">
|
||||
<a href="http://www.gnome.org/"><img src="smallfootonly.gif" alt="Gnome Logo"></a><a href="http://www.redhat.com"><img src="redhat.gif" alt="Red Hat Logo"></a>
|
||||
</td>
|
||||
<td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center">
|
||||
<h1>The XSLT C library for Gnome</h1>
|
||||
<h2>Library internals</h2>
|
||||
</td></tr></table></td></tr></table></td>
|
||||
</tr></table>
|
||||
<table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr>
|
||||
<td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td>
|
||||
<table width="100%" border="0" cellspacing="1" cellpadding="3">
|
||||
<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Main Menu</b></center></td></tr>
|
||||
<tr><td bgcolor="#fffacd"><ul style="margin-left: -2pt">
|
||||
<li><a href="index.html">Home</a></li>
|
||||
<li><a href="intro.html">Introduction</a></li>
|
||||
<li><a href="docs.html">Documentation</a></li>
|
||||
<li><a href="bugs.html">Reporting bugs and getting help</a></li>
|
||||
<li><a href="help.html">How to help</a></li>
|
||||
<li><a href="downloads.html">Downloads</a></li>
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
</li>
|
||||
</ul></td></tr>
|
||||
</table>
|
||||
<table width="100%" border="0" cellspacing="1" cellpadding="3">
|
||||
<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr>
|
||||
<tr><td bgcolor="#fffacd"><ul style="margin-left: -2pt">
|
||||
<li><a href="tutorial/libxslttutorial.html">Tutorial</a></li>
|
||||
<li><a href="xsltproc.html">Man page for xsltproc</a></li>
|
||||
<li><a href="http://mail.gnome.org/archives/xslt/">Mail archive</a></li>
|
||||
<li><a href="http://xmlsoft.org/">XML libxml</a></li>
|
||||
<li><a href="http://www.cs.unibo.it/~casarini/gdome2/">DOM gdome2</a></li>
|
||||
<li><a href="ftp://xmlsoft.org/">FTP</a></li>
|
||||
<li><a href="http://www.fh-frankfurt.de/~igor/projects/libxml/">Windows binaries</a></li>
|
||||
<li><a href="http://pages.eidosnet.co.uk/~garypen/libxml/">Solaris binaries</a></li>
|
||||
<li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxslt">Bug Tracker</a></li>
|
||||
</ul></td></tr>
|
||||
</table>
|
||||
</td></tr></table></td>
|
||||
<td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd">
|
||||
<h3>Table of contents</h3>
|
||||
<ul>
|
||||
<li><a href="#Introducti">Introduction</a></li>
|
||||
<li><a href="#Basics">Basics</a></li>
|
||||
<li><a href="#Keep">Keep it simple stupid</a></li>
|
||||
<li><a href="#libxml">The libxml nodes</a></li>
|
||||
<li><a href="#XSLT">The XSLT processing steps</a></li>
|
||||
<li><a href="#XSLT1">The XSLT stylesheet compilation</a></li>
|
||||
<li><a href="#XSLT2">The XSLT template compilation</a></li>
|
||||
<li><a href="#processing">The processing itself</a></li>
|
||||
<li><a href="#XPath">XPath expressions compilation</a></li>
|
||||
<li><a href="#XPath1">XPath interpretation</a></li>
|
||||
<li><a href="#Descriptio">Description of XPath Objects</a></li>
|
||||
<li><a href="#XPath2">XPath functions</a></li>
|
||||
<li><a href="#stack">The variables stack frame</a></li>
|
||||
<li><a href="#Extension">Extension support</a></li>
|
||||
<li><a href="#Futher">Further reading</a></li>
|
||||
<li><a href="#TODOs">TODOs</a></li>
|
||||
<li><a href="internals.html#Introducti">Introduction</a></li>
|
||||
<li><a href="internals.html#Basics">Basics</a></li>
|
||||
<li><a href="internals.html#Keep">Keep it simple stupid</a></li>
|
||||
<li><a href="internals.html#libxml">The libxml nodes</a></li>
|
||||
<li><a href="internals.html#XSLT">The XSLT processing steps</a></li>
|
||||
<li><a href="internals.html#XSLT1">The XSLT stylesheet compilation</a></li>
|
||||
<li><a href="internals.html#XSLT2">The XSLT template compilation</a></li>
|
||||
<li><a href="internals.html#processing">The processing itself</a></li>
|
||||
<li><a href="internals.html#XPath">XPath expressions compilation</a></li>
|
||||
<li><a href="internals.html#XPath1">XPath interpretation</a></li>
|
||||
<li><a href="internals.html#Descriptio">Description of XPath
|
||||
Objects</a></li>
|
||||
<li><a href="internals.html#XPath3">XPath functions</a></li>
|
||||
<li><a href="internals.html#stack">The variables stack frame</a></li>
|
||||
<li><a href="internals.html#Extension">Extension support</a></li>
|
||||
<li><a href="internals.html#Futher">Further reading</a></li>
|
||||
<li><a href="internals.html#TODOs">TODOs</a></li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="Introducti">Introduction</a></h2>
|
||||
|
||||
<p>This document describes the processing of <a
|
||||
href="http://xmlsoft.org/XSLT/">libxslt</a>, the <a
|
||||
href="http://www.w3.org/TR/xslt">XSLT</a> C library developed for the <a
|
||||
href="http://www.gnome.org/">Gnome</a> project.</p>
|
||||
|
||||
<h3><a name="Introducti2">Introduction</a></h3>
|
||||
<p>This document describes the processing of <a href="http://xmlsoft.org/XSLT/">libxslt</a>, the <a href="http://www.w3.org/TR/xslt">XSLT</a> C library developed for the <a href="http://www.gnome.org/">Gnome</a> project.</p>
|
||||
<p>Note: this documentation is by definition incomplete and I am not good at
|
||||
spelling, grammar, so patches and suggestions are <a
|
||||
href="mailto:veillard@redhat.com">really welcome</a>.</p>
|
||||
|
||||
<h2><a name="Basics">Basics</a></h2>
|
||||
|
||||
spelling, grammar, so patches and suggestions are <a href="mailto:veillard@redhat.com">really welcome</a>.</p>
|
||||
<h3><a name="Basics1">Basics</a></h3>
|
||||
<p>XSLT is a transformation language. It takes an input document and a
|
||||
stylesheet document and generates an output document:</p>
|
||||
|
||||
<p align="center"><img src="processing.gif"
|
||||
alt="the XSLT processing model"></p>
|
||||
|
||||
<p>Libxslt is written in C. It relies on <a href="http://www.xmlsoft.org/">libxml</a>,
|
||||
the XML C library for Gnome, for the following operations:</p>
|
||||
<p align="center"><img src="processing.gif" alt="the XSLT processing model"></p>
|
||||
<p>Libxslt is written in C. It relies on <a href="http://www.xmlsoft.org/">libxml</a>, the XML C library for Gnome, for
|
||||
the following operations:</p>
|
||||
<ul>
|
||||
<li>parsing files</li>
|
||||
<li>building the in-memory DOM structure associated with the documents
|
||||
handled</li>
|
||||
<li>the XPath implementation</li>
|
||||
<li>serializing back the result document to XML and HTML. (Text is handled
|
||||
<li>parsing files</li>
|
||||
<li>building the in-memory DOM structure associated with the documents
|
||||
handled</li>
|
||||
<li>the XPath implementation</li>
|
||||
<li>serializing back the result document to XML and HTML. (Text is handled
|
||||
directly.)</li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="Keep">Keep it simple stupid</a></h2>
|
||||
|
||||
<h3><a name="Keep1">Keep it simple stupid</a></h3>
|
||||
<p>Libxslt is not very specialized. It is built under the assumption that all
|
||||
nodes from the source and output document can fit in the virtual memory of the
|
||||
system. There is a big trade-off there. It is fine for reasonably sized
|
||||
nodes from the source and output document can fit in the virtual memory of
|
||||
the system. There is a big trade-off there. It is fine for reasonably sized
|
||||
documents but may not be suitable for large sets of data. The gain is that it
|
||||
can be used in a relatively versatile way. The input or output may never be
|
||||
serialized, but the size of documents it can handle are limited by the size of
|
||||
the memory available.</p>
|
||||
|
||||
<p>More specialized memory handling approaches are possible, like building the
|
||||
input tree from a serialization progressively as it is consumed, factoring
|
||||
repetitive patterns, or even on-the-fly generation of the output as the input
|
||||
is parsed but it is possible only for a limited subset of the stylesheets. In
|
||||
general the implementation of libxslt follows the following pattern:</p>
|
||||
serialized, but the size of documents it can handle are limited by the size
|
||||
of the memory available.</p>
|
||||
<p>More specialized memory handling approaches are possible, like building
|
||||
the input tree from a serialization progressively as it is consumed,
|
||||
factoring repetitive patterns, or even on-the-fly generation of the output as
|
||||
the input is parsed but it is possible only for a limited subset of the
|
||||
stylesheets. In general the implementation of libxslt follows the following
|
||||
pattern:</p>
|
||||
<ul>
|
||||
<li>KISS (keep it simple stupid)</li>
|
||||
<li>when there is a clear bottleneck optimize on top of this simple
|
||||
<li>KISS (keep it simple stupid)</li>
|
||||
<li>when there is a clear bottleneck optimize on top of this simple
|
||||
framework and refine only as much as is needed to reach the expected
|
||||
result</li>
|
||||
</ul>
|
||||
|
||||
<p>The result is not that bad, clearly one can do a better job but more
|
||||
specialized too. Most optimization like building the tree on-demand would need
|
||||
serious changes to the libxml XPath framework. An easy step would be to
|
||||
specialized too. Most optimization like building the tree on-demand would
|
||||
need serious changes to the libxml XPath framework. An easy step would be to
|
||||
serialize the output directly (or call a set of SAX-like output handler to
|
||||
keep this a flexible interface) and hence avoid the memory consumption of the
|
||||
result.</p>
|
||||
|
||||
<h2><a name="libxml">The libxml nodes</a></h2>
|
||||
|
||||
<p>DOM-like trees, as used and generated by libxml and libxslt, are relatively
|
||||
complex. Most node types follow the given structure except a few variations
|
||||
depending on the node type:</p>
|
||||
|
||||
<h3><a name="libxml">The libxml nodes</a></h3>
|
||||
<p>DOM-like trees, as used and generated by libxml and libxslt, are
|
||||
relatively complex. Most node types follow the given structure except a few
|
||||
variations depending on the node type:</p>
|
||||
<p align="center"><img src="node.gif" alt="description of a libxml node"></p>
|
||||
|
||||
<p>Nodes carry a <strong>name</strong> and the node <strong>type</strong>
|
||||
indicates the kind of node it represents, the most common ones are:</p>
|
||||
<ul>
|
||||
<li>document nodes</li>
|
||||
<li>element nodes</li>
|
||||
<li>text nodes</li>
|
||||
<li>document nodes</li>
|
||||
<li>element nodes</li>
|
||||
<li>text nodes</li>
|
||||
</ul>
|
||||
|
||||
<p>For the XSLT processing, entity nodes should not be generated (i.e. they
|
||||
should be replaced by their content). Most nodes also contains the following
|
||||
"navigation" informations:</p>
|
||||
"navigation" informations:</p>
|
||||
<ul>
|
||||
<li>the containing <strong>doc</strong>ument</li>
|
||||
<li>the <strong>parent</strong> node</li>
|
||||
<li>the first <strong>children</strong> node</li>
|
||||
<li>the <strong>last</strong> children node</li>
|
||||
<li>the <strong>prev</strong>ious sibling</li>
|
||||
<li>the following sibling (<strong>next</strong>)</li>
|
||||
<li>the containing <strong>doc</strong>ument</li>
|
||||
<li>the <strong>parent</strong> node</li>
|
||||
<li>the first <strong>children</strong> node</li>
|
||||
<li>the <strong>last</strong> children node</li>
|
||||
<li>the <strong>prev</strong>ious sibling</li>
|
||||
<li>the following sibling (<strong>next</strong>)</li>
|
||||
</ul>
|
||||
|
||||
<p>Elements nodes carries the list of attributes in the properties, an
|
||||
attribute itself holds the navigation pointers and the children list (the
|
||||
attribute value is not represented as a simple string to allow usage of
|
||||
entities references).</p>
|
||||
|
||||
<p>The <strong>ns</strong> points to the namespace declaration for the
|
||||
namespace associated to the node, <strong>nsDef</strong> is the linked list of
|
||||
namespace declaration present on element nodes.</p>
|
||||
|
||||
namespace associated to the node, <strong>nsDef</strong> is the linked list
|
||||
of namespace declaration present on element nodes.</p>
|
||||
<p>Most nodes also carry an <strong>_private</strong> pointer which can be
|
||||
used by the application to hold specific data on this node.</p>
|
||||
|
||||
<h2><a name="XSLT">The XSLT processing steps</a></h2>
|
||||
|
||||
<h3><a name="XSLT">The XSLT processing steps</a></h3>
|
||||
<p>There are a few steps which are clearly decoupled at the interface
|
||||
level:</p>
|
||||
<ol>
|
||||
<li>parse the stylesheet and generate a DOM tree</li>
|
||||
<li>take the stylesheet tree and build a compiled version of it (the
|
||||
<li>parse the stylesheet and generate a DOM tree</li>
|
||||
<li>take the stylesheet tree and build a compiled version of it (the
|
||||
compilation phase)</li>
|
||||
<li>take the input and generate a DOM tree</li>
|
||||
<li>process the stylesheet against the input tree and generate an output
|
||||
<li>take the input and generate a DOM tree</li>
|
||||
<li>process the stylesheet against the input tree and generate an output
|
||||
tree</li>
|
||||
<li>serialize the output tree</li>
|
||||
<li>serialize the output tree</li>
|
||||
</ol>
|
||||
|
||||
<p>A few things should be noted here:</p>
|
||||
<ul>
|
||||
<li>the steps 1/ 3/ and 5/ are optional</li>
|
||||
<li>the stylesheet obtained at 2/ can be reused by multiple processing 4/
|
||||
<li>the steps 1/ 3/ and 5/ are optional</li>
|
||||
<li>the stylesheet obtained at 2/ can be reused by multiple processing 4/
|
||||
(and this should also work in threaded programs)</li>
|
||||
<li>the tree provided in 2/ should never be freed using xmlFreeDoc, but by
|
||||
<li>the tree provided in 2/ should never be freed using xmlFreeDoc, but by
|
||||
freeing the stylesheet.</li>
|
||||
<li>the input tree 4/ is not modified except the _private field which may be
|
||||
used for labelling keys if used by the stylesheet</li>
|
||||
<li>the input tree 4/ is not modified except the _private field which may
|
||||
be used for labelling keys if used by the stylesheet</li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="XSLT1">The XSLT stylesheet compilation</a></h2>
|
||||
|
||||
<h3><a name="XSLT1">The XSLT stylesheet compilation</a></h3>
|
||||
<p>This is the second step described. It takes a stylesheet tree, and
|
||||
"compiles" it. This associates to each node a structure stored in the
|
||||
"compiles" it. This associates to each node a structure stored in the
|
||||
_private field and containing information computed in the stylesheet:</p>
|
||||
|
||||
<p align="center"><img src="stylesheet.gif"
|
||||
alt="a compiled XSLT stylesheet"></p>
|
||||
|
||||
<p align="center"><img src="stylesheet.gif" alt="a compiled XSLT stylesheet"></p>
|
||||
<p>One xsltStylesheet structure is generated per document parsed for the
|
||||
stylesheet. XSLT documents allow includes and imports of other documents,
|
||||
imports are stored in the <strong>imports</strong> list (hence keeping the
|
||||
@@ -188,121 +190,103 @@ tree hierarchy of includes which is very important for a proper XSLT
|
||||
processing model) and includes are stored in the <strong>doclist</strong>
|
||||
list. An imported stylesheet has a parent link to allow browsing of the
|
||||
tree.</p>
|
||||
|
||||
<p>The DOM tree associated to the document is stored in <strong>doc</strong>.
|
||||
It is preprocessed to remove ignorable empty nodes and all the nodes in the
|
||||
XSLT namespace are subject to precomputing. This usually consist of
|
||||
extracting all the context information from the context tree (attributes,
|
||||
namespaces, XPath expressions), and storing them in an xsltStylePreComp
|
||||
structure associated to the <strong>_private</strong> field of the node.</p>
|
||||
|
||||
<p>A couple of notable exceptions to this are XSLT template nodes (more on
|
||||
this later) and attribute value templates. If they are actually templates, the
|
||||
value cannot be computed at compilation time. (Some preprocessing could be done
|
||||
like isolation and preparsing of the XPath subexpressions but it's not done,
|
||||
yet.)</p>
|
||||
|
||||
<p>The xsltStylePreComp structure also allows storing of the precompiled form of
|
||||
an XPath expression that can be associated to an XSLT element (more on this
|
||||
later).</p>
|
||||
|
||||
<h2><a name="XSLT2">The XSLT template compilation</a></h2>
|
||||
|
||||
this later) and attribute value templates. If they are actually templates,
|
||||
the value cannot be computed at compilation time. (Some preprocessing could
|
||||
be done like isolation and preparsing of the XPath subexpressions but it's
|
||||
not done, yet.)</p>
|
||||
<p>The xsltStylePreComp structure also allows storing of the precompiled form
|
||||
of an XPath expression that can be associated to an XSLT element (more on
|
||||
this later).</p>
|
||||
<h3><a name="XSLT2">The XSLT template compilation</a></h3>
|
||||
<p>A proper handling of templates lookup is one of the keys of fast XSLT
|
||||
processing. (Given a node in the source document this is the process of finding
|
||||
which templates should be applied to this node.) Libxslt follows the hint
|
||||
suggested in the <a href="http://www.w3.org/TR/xslt#patterns">5.2 Patterns</a>
|
||||
section of the XSLT Recommendation, i.e. it doesn't evaluate it as an XPath
|
||||
expression but tokenizes it and compiles it as a set of rules to be evaluated on
|
||||
a candidate node. There usually is an indication of the node name in the last
|
||||
step of this evaluation and this is used as a key check for the match. As a
|
||||
result libxslt builds a relatively more complex set of structures for the
|
||||
templates:</p>
|
||||
|
||||
<p align="center"><img src="templates.gif"
|
||||
alt="The templates related structure"></p>
|
||||
|
||||
processing. (Given a node in the source document this is the process of
|
||||
finding which templates should be applied to this node.) Libxslt follows the
|
||||
hint suggested in the <a href="http://www.w3.org/TR/xslt#patterns">5.2
|
||||
Patterns</a> section of the XSLT Recommendation, i.e. it doesn't evaluate it
|
||||
as an XPath expression but tokenizes it and compiles it as a set of rules to
|
||||
be evaluated on a candidate node. There usually is an indication of the node
|
||||
name in the last step of this evaluation and this is used as a key check for
|
||||
the match. As a result libxslt builds a relatively more complex set of
|
||||
structures for the templates:</p>
|
||||
<p align="center"><img src="templates.gif" alt="The templates related structure"></p>
|
||||
<p>Let's describe a bit more closely what is built. First the xsltStylesheet
|
||||
structure holds a pointer to the template hash table. All the XSLT patterns
|
||||
compiled in this stylesheet are indexed by the value of the the target element
|
||||
(or attribute, pi ...) name, so when a element or an attribute "foo" needs to
|
||||
be processed the lookup is done using the name as a key.</p>
|
||||
|
||||
compiled in this stylesheet are indexed by the value of the the target
|
||||
element (or attribute, pi ...) name, so when a element or an attribute "foo"
|
||||
needs to be processed the lookup is done using the name as a key.</p>
|
||||
<p>Each of the patterns is compiled into an xsltCompMatch structure. It holds
|
||||
the set of rules based on the tokenization of the pattern stored in
|
||||
reverse order (matching is easier this way). It also holds some information
|
||||
about the previous matches used to speed up the process when one iterates over
|
||||
a set of siblings. (This optimization may be defeated by trashing when running
|
||||
the set of rules based on the tokenization of the pattern stored in reverse
|
||||
order (matching is easier this way). It also holds some information about the
|
||||
previous matches used to speed up the process when one iterates over a set of
|
||||
siblings. (This optimization may be defeated by trashing when running
|
||||
threaded computation, it's unclear that this is a big deal in practice.)
|
||||
Predicate expressions are not compiled at this stage, they may be at run-time
|
||||
if needed, but in this case they are compiled as full XPath expressions (the
|
||||
use of some fixed predicate can probably be optimized, they are not yet).</p>
|
||||
|
||||
<p>The xsltCompMatch are then stored in the hash table, the clash list is
|
||||
itself sorted by priority of the template to implement "naturally" the XSLT
|
||||
itself sorted by priority of the template to implement "naturally" the XSLT
|
||||
priority rules.</p>
|
||||
|
||||
<p>Associated to the compiled pattern is the xsltTemplate itself containing
|
||||
the information required for the processing of the pattern including,
|
||||
of course, a pointer to the list of elements used for building the pattern
|
||||
the information required for the processing of the pattern including, of
|
||||
course, a pointer to the list of elements used for building the pattern
|
||||
result.</p>
|
||||
|
||||
<p>Last but not least a number of patterns do not fit in the hash table
|
||||
because they are not associated to a name, this is the case for patterns
|
||||
applying to the root, any element, any attributes, text nodes, pi nodes, keys
|
||||
etc. Those are stored independently in the stylesheet structure as separate
|
||||
linked lists of xsltCompMatch.</p>
|
||||
|
||||
<h2><a name="processing">The processing itself</a></h2>
|
||||
|
||||
<p>The processing is defined by the XSLT specification (the
|
||||
basis of the algorithm is explained in <a
|
||||
href="http://www.w3.org/TR/xslt#section-Introduction">the Introduction</a>
|
||||
<h3><a name="processing">The processing itself</a></h3>
|
||||
<p>The processing is defined by the XSLT specification (the basis of the
|
||||
algorithm is explained in <a href="http://www.w3.org/TR/xslt#section-Introduction">the Introduction</a>
|
||||
section). Basically it works by taking the root of the input document and
|
||||
applying the following algorithm:</p>
|
||||
<ol>
|
||||
<li>Finding the template applying to it. This is a lookup in the
|
||||
template hash table, walking the hash list until the node satisfies all
|
||||
the steps of the pattern, then checking the appropriate(s) global
|
||||
templates to see if there isn't a higher priority rule to apply</li>
|
||||
<li>If there is no template, apply the default rule (recurse on the
|
||||
<li>Finding the template applying to it. This is a lookup in the template
|
||||
hash table, walking the hash list until the node satisfies all the steps
|
||||
of the pattern, then checking the appropriate(s) global templates to see
|
||||
if there isn't a higher priority rule to apply</li>
|
||||
<li>If there is no template, apply the default rule (recurse on the
|
||||
children)</li>
|
||||
<li>else walk the content list of the selected templates, for each of them:
|
||||
<li>else walk the content list of the selected templates, for each of them:
|
||||
<ul>
|
||||
<li>if the node is in the XSLT namespace then the node has a _private
|
||||
<li>if the node is in the XSLT namespace then the node has a _private
|
||||
field pointing to the preprocessed values, jump to the specific
|
||||
code</li>
|
||||
<li>if the node is in an extension namespace, look up the associated
|
||||
<li>if the node is in an extension namespace, look up the associated
|
||||
behavior</li>
|
||||
<li>otherwise copy the node.</li>
|
||||
</ul>
|
||||
<p>The closure is usually done through the XSLT
|
||||
<li>otherwise copy the node.</li>
|
||||
</ul>
|
||||
<p>The closure is usually done through the XSLT
|
||||
<strong>apply-templates</strong> construct recursing by applying the
|
||||
adequate template on the input node children or on the result of an
|
||||
associated XPath selection lookup.</p>
|
||||
</li>
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
<p>Note that large parts of the input tree may not be processed by a given
|
||||
stylesheet and that on the opposite some may be processed multiple times. (This
|
||||
often is the case when a Table of Contents is built).</p>
|
||||
|
||||
stylesheet and that on the opposite some may be processed multiple times.
|
||||
(This often is the case when a Table of Contents is built).</p>
|
||||
<p>The module <code>transform.c</code> is the one implementing most of this
|
||||
logic. <strong>xsltApplyStylesheet()</strong> is the entry point, it allocates
|
||||
an xsltTransformContext containing the following:</p>
|
||||
logic. <strong>xsltApplyStylesheet()</strong> is the entry point, it
|
||||
allocates an xsltTransformContext containing the following:</p>
|
||||
<ul>
|
||||
<li>a pointer to the stylesheet being processed</li>
|
||||
<li>a stack of templates</li>
|
||||
<li>a stack of variables and parameters</li>
|
||||
<li>an XPath context</li>
|
||||
<li>the template mode</li>
|
||||
<li>current document</li>
|
||||
<li>current input node</li>
|
||||
<li>current selected node list</li>
|
||||
<li>the current insertion points in the output document</li>
|
||||
<li>a couple of hash tables for extension elements and functions</li>
|
||||
<li>a pointer to the stylesheet being processed</li>
|
||||
<li>a stack of templates</li>
|
||||
<li>a stack of variables and parameters</li>
|
||||
<li>an XPath context</li>
|
||||
<li>the template mode</li>
|
||||
<li>current document</li>
|
||||
<li>current input node</li>
|
||||
<li>current selected node list</li>
|
||||
<li>the current insertion points in the output document</li>
|
||||
<li>a couple of hash tables for extension elements and functions</li>
|
||||
</ul>
|
||||
|
||||
<p>Then a new document gets allocated (HTML or XML depending on the type of
|
||||
output), the user parameters and global variables and parameters are
|
||||
evaluated. Then <strong>xsltProcessOneNode()</strong> which implements the
|
||||
@@ -310,19 +294,15 @@ evaluated. Then <strong>xsltProcessOneNode()</strong> which implements the
|
||||
implemented by calling <strong>xsltGetTemplate()</strong>, step 2/ is
|
||||
implemented by <strong>xsltDefaultProcessOneNode()</strong> and step 3/ is
|
||||
implemented by <strong>xsltApplyOneTemplate()</strong>.</p>
|
||||
|
||||
<h2><a name="XPath">XPath expression compilation</a></h2>
|
||||
|
||||
<p>The XPath support is actually implemented in the libxml module (where it is
|
||||
reused by the XPointer implementation). XPath is a relatively classic
|
||||
<h3><a name="XPath">XPath expression compilation</a></h3>
|
||||
<p>The XPath support is actually implemented in the libxml module (where it
|
||||
is reused by the XPointer implementation). XPath is a relatively classic
|
||||
expression language. The only uncommon feature is that it is working on XML
|
||||
trees and hence has specific syntax and types to handle them.</p>
|
||||
|
||||
<p>XPath expressions are compiled using <strong>xmlXPathCompile()</strong>. It
|
||||
will take an expression string in input and generate a structure containing
|
||||
the parsed expression tree, for example the expression:</p>
|
||||
<p>XPath expressions are compiled using <strong>xmlXPathCompile()</strong>.
|
||||
It will take an expression string in input and generate a structure
|
||||
containing the parsed expression tree, for example the expression:</p>
|
||||
<pre>/doc/chapter[title='Introduction']</pre>
|
||||
|
||||
<p>will be compiled as</p>
|
||||
<pre>Compiled Expression : 10 elements
|
||||
SORT
|
||||
@@ -337,183 +317,147 @@ the parsed expression tree, for example the expression:</p>
|
||||
ELEM Object is a string : Introduction
|
||||
COLLECT 'child' 'name' 'node' title
|
||||
NODE</pre>
|
||||
|
||||
<p>This can be tested using the <code>testXPath</code> command (in the
|
||||
libxml codebase) using the <code>--tree</code> option.</p>
|
||||
|
||||
<p>Again, the KISS approach is used. No optimization is done. This could be an
|
||||
interesting thing to add. <a
|
||||
href="http://www-106.ibm.com/developerworks/library/x-xslt2/?dwzone=x?open&l=132%2ct=gr%2c+p=saxon">Michael
|
||||
<p>Again, the KISS approach is used. No optimization is done. This could be
|
||||
an interesting thing to add. <a href="http://www-106.ibm.com/developerworks/library/x-xslt2/?dwzone=x?open&l=132%2ct=gr%2c+p=saxon">Michael
|
||||
Kay describes</a> a lot of possible and interesting optimizations done in
|
||||
Saxon which would be possible at this level. I'm unsure they would provide
|
||||
much gain since the expressions tends to be relatively simple in general and
|
||||
stylesheets are still hand generated. Optimizations at the interpretation
|
||||
sounds likely to be more efficient.</p>
|
||||
|
||||
<h2><a name="XPath1">XPath interpretation</a></h2>
|
||||
|
||||
<h3><a name="XPath1">XPath interpretation</a></h3>
|
||||
<p>The interpreter is implemented by <strong>xmlXPathCompiledEval()</strong>
|
||||
which is the front-end to <strong>xmlXPathCompOpEval()</strong> the function
|
||||
implementing the evaluation of the expression tree. This evaluation follows
|
||||
the KISS approach again. It's recursive and calls
|
||||
<strong>xmlXPathNodeCollectAndTest()</strong> to collect nodes set when
|
||||
evaluating a <code>COLLECT</code> node.</p>
|
||||
|
||||
<p>An evaluation is done within the framework of an XPath context stored in an
|
||||
<strong>xmlXPathContext</strong> structure, in the framework of a
|
||||
<p>An evaluation is done within the framework of an XPath context stored in
|
||||
an <strong>xmlXPathContext</strong> structure, in the framework of a
|
||||
transformation the context is maintained within the XSLT context. Its content
|
||||
follows the requirements from the XPath specification:</p>
|
||||
<ul>
|
||||
<li>the current document</li>
|
||||
<li>the current node</li>
|
||||
<li>a hash table of defined variables (but not used by XSLT)</li>
|
||||
<li>a hash table of defined functions</li>
|
||||
<li>the proximity position (the place of the node in the current node
|
||||
<li>the current document</li>
|
||||
<li>the current node</li>
|
||||
<li>a hash table of defined variables (but not used by XSLT)</li>
|
||||
<li>a hash table of defined functions</li>
|
||||
<li>the proximity position (the place of the node in the current node
|
||||
list)</li>
|
||||
<li>the context size (the size of the current node list)</li>
|
||||
<li>the array of namespace declarations in scope (there also is a namespace
|
||||
<li>the context size (the size of the current node list)</li>
|
||||
<li>the array of namespace declarations in scope (there also is a namespace
|
||||
hash table but it is not used in the XSLT transformation).</li>
|
||||
</ul>
|
||||
|
||||
<p>For the purpose of XSLT an <strong>extra</strong> pointer has been added
|
||||
allowing to retrieve the XSLT transformation context. When an XPath evaluation
|
||||
is about to be performed, an XPath parser context is allocated containing and
|
||||
XPath object stack (this is actually an XPath evaluation context, this is a
|
||||
remain of the time where there was no separate parsing and evaluation phase in
|
||||
the XPath implementation). Here is an overview of the set of contexts
|
||||
associated to an XPath evaluation within an XSLT transformation:</p>
|
||||
|
||||
<p align="center"><img src="contexts.gif"
|
||||
alt="The set of contexts associated "></p>
|
||||
|
||||
<p>Clearly this is a bit too complex and confusing and should be refactored at
|
||||
the next set of binary incompatible releases of libxml. For example the
|
||||
allowing to retrieve the XSLT transformation context. When an XPath
|
||||
evaluation is about to be performed, an XPath parser context is allocated
|
||||
containing and XPath object stack (this is actually an XPath evaluation
|
||||
context, this is a remain of the time where there was no separate parsing and
|
||||
evaluation phase in the XPath implementation). Here is an overview of the set
|
||||
of contexts associated to an XPath evaluation within an XSLT
|
||||
transformation:</p>
|
||||
<p align="center"><img src="contexts.gif" alt="The set of contexts associated "></p>
|
||||
<p>Clearly this is a bit too complex and confusing and should be refactored
|
||||
at the next set of binary incompatible releases of libxml. For example the
|
||||
xmlXPathCtxt has a lot of unused parts and should probably be merged with
|
||||
xmlXPathParserCtxt.</p>
|
||||
|
||||
<h2><a name="Descriptio">Description of XPath Objects</a></h2>
|
||||
|
||||
<h3><a name="Descriptio">Description of XPath Objects</a></h3>
|
||||
<p>An XPath expression manipulates XPath objects. XPath defines the default
|
||||
types boolean, numbers, strings and node sets. XSLT adds the result tree
|
||||
fragment type which is basically an unmodifiable node set.</p>
|
||||
|
||||
<p>Implementation-wise, libxml follows again a KISS approach, the
|
||||
xmlXPathObject is a structure containing a type description and the various
|
||||
possibilities. (Using an enum could have gained some bytes.) In the case of
|
||||
node sets (or result tree fragments), it points to a separate xmlNodeSet
|
||||
object which contains the list of pointers to the document nodes:</p>
|
||||
|
||||
<p align="center"><img src="object.gif"
|
||||
alt="An Node set object pointing to "></p>
|
||||
|
||||
<p align="center"><img src="object.gif" alt="An Node set object pointing to "></p>
|
||||
<p>The <a href="http://xmlsoft.org/html/libxml-xpath.html">XPath API</a> (and
|
||||
its <a href="http://xmlsoft.org/html/libxml-xpathinternals.html">'internal'
|
||||
part</a>) includes a number of functions to create, copy, compare, convert or
|
||||
free XPath objects.</p>
|
||||
|
||||
<h2><a name="XPath2">XPath functions</a></h2>
|
||||
|
||||
<h3><a name="XPath3">XPath functions</a></h3>
|
||||
<p>All the XPath functions available to the interpreter are registered in the
|
||||
function hash table linked from the XPath context. They all share the same
|
||||
signature:</p>
|
||||
<pre>void xmlXPathFunc (xmlXPathParserContextPtr ctxt, int nargs);</pre>
|
||||
|
||||
<p>The first argument is the XPath interpretation context, holding the
|
||||
interpretation stack. The second argument defines the number of objects passed
|
||||
on the stack for the function to consume (last argument is on top of the
|
||||
stack).</p>
|
||||
|
||||
interpretation stack. The second argument defines the number of objects
|
||||
passed on the stack for the function to consume (last argument is on top of
|
||||
the stack).</p>
|
||||
<p>Basically an XPath function does the following:</p>
|
||||
<ul>
|
||||
<li>check <code>nargs</code> for proper handling of errors or functions with
|
||||
variable numbers of parameters</li>
|
||||
<li>pop the parameters from the stack using <code>obj =
|
||||
valuePop(ctxt);</code></li>
|
||||
<li>do the function specific computation</li>
|
||||
<li>push the result parameter on the stack using <code>valuePush(ctxt,
|
||||
res);</code></li>
|
||||
<li>free up the input parameters with
|
||||
<code>xmlXPathFreeObject(obj);</code></li>
|
||||
<li>return</li>
|
||||
<li>check <code>nargs</code> for proper handling of errors or functions
|
||||
with variable numbers of parameters</li>
|
||||
<li>pop the parameters from the stack using <code>obj =
|
||||
valuePop(ctxt);</code>
|
||||
</li>
|
||||
<li>do the function specific computation</li>
|
||||
<li>push the result parameter on the stack using <code>valuePush(ctxt,
|
||||
res);</code>
|
||||
</li>
|
||||
<li>free up the input parameters with
|
||||
<code>xmlXPathFreeObject(obj);</code>
|
||||
</li>
|
||||
<li>return</li>
|
||||
</ul>
|
||||
|
||||
<p>Sometime the work can be done directly by modifying in-situ the top object
|
||||
on the stack <code>ctxt->value</code>.</p>
|
||||
|
||||
<h2><a name="stack">The XSLT variables stack frame</a></h2>
|
||||
|
||||
<h3><a name="stack">The XSLT variables stack frame</a></h3>
|
||||
<p>Not to be confused with XPath object stack, this stack holds the XSLT
|
||||
variables and parameters as they are defined through the recursive calls of
|
||||
call-template, apply-templates and default templates. This is used to define
|
||||
the scope of variables being called.</p>
|
||||
|
||||
<p>This part seems to be the most urgent attention right now, first it is done
|
||||
in a very inefficient way since the location of the variables and
|
||||
<p>This part seems to be the most urgent attention right now, first it is
|
||||
done in a very inefficient way since the location of the variables and
|
||||
parameters within the stylesheet tree is still done at run time (it really
|
||||
should be done statically at compile time), and I am still unsure that my
|
||||
understanding of the template variables and parameter scope is actually
|
||||
right.</p>
|
||||
|
||||
<p>This part of the documentation is still to be written once this part of the
|
||||
code will be stable. <span style="background-color: #FF0000">TODO</span></p>
|
||||
|
||||
<h2><a name="Extension">Extension support</a></h2>
|
||||
|
||||
<p>This part of the documentation is still to be written once this part of
|
||||
the code will be stable. <span style="background-color: #FF0000">TODO</span>
|
||||
</p>
|
||||
<h3><a name="Extension">Extension support</a></h3>
|
||||
<p>There is a separate document explaining <a href="extensions.html">how the
|
||||
extension support works</a>. </p>
|
||||
|
||||
<h2><a name="Futher">Further reading</a></h2>
|
||||
|
||||
<p>Michael Kay wrote <a
|
||||
href="http://www-106.ibm.com/developerworks/library/x-xslt2/?dwzone=x?open&l=132%2ct=gr%2c+p=saxon">a
|
||||
extension support works</a>.</p>
|
||||
<h3><a name="Futher">Further reading</a></h3>
|
||||
<p>Michael Kay wrote <a href="http://www-106.ibm.com/developerworks/library/x-xslt2/?dwzone=x?open&l=132%2ct=gr%2c+p=saxon">a
|
||||
really interesting article on Saxon internals</a> and the work he did on
|
||||
performance issues. I wishes I had read it before starting libxslt design (I
|
||||
would probably have avoided a few mistakes and progressed faster). A lot of
|
||||
the ideas in his papers should be implemented or at least tried in
|
||||
libxslt.</p>
|
||||
|
||||
<p>The <a href="http://xmlsoft.org/">libxml documentation</a>, especially <a
|
||||
href="http://xmlsoft.org/xmlio.html">the I/O interfaces</a> and the <a
|
||||
href="http://xmlsoft.org/xmlmem.html">memory management</a>.</p>
|
||||
|
||||
<h2><a name="TODOs">TODOs</a></h2>
|
||||
|
||||
<p>The <a href="http://xmlsoft.org/">libxml documentation</a>, especially <a href="http://xmlsoft.org/xmlio.html">the I/O interfaces</a> and the <a href="http://xmlsoft.org/xmlmem.html">memory management</a>.</p>
|
||||
<h3><a name="TODOs">TODOs</a></h3>
|
||||
<p>redesign the XSLT stack frame handling. Far too much work is done at
|
||||
execution time. Similarly for the attribute value templates handling, at least
|
||||
the embedded subexpressions ought to be precompiled.</p>
|
||||
|
||||
execution time. Similarly for the attribute value templates handling, at
|
||||
least the embedded subexpressions ought to be precompiled.</p>
|
||||
<p>Allow output to be saved to a SAX like output (this notion of SAX like API
|
||||
for output should be added directly to libxml).</p>
|
||||
|
||||
<p>Implement and test some of the optimization explained by Michael Kay
|
||||
especially:</p>
|
||||
<ul>
|
||||
<li>static slot allocation on the stack frame</li>
|
||||
<li>specific boolean interpretation of an XPath expression</li>
|
||||
<li>some of the sorting optimization</li>
|
||||
<li>Lazy evaluation of location path. (this may require more changes but
|
||||
<li>static slot allocation on the stack frame</li>
|
||||
<li>specific boolean interpretation of an XPath expression</li>
|
||||
<li>some of the sorting optimization</li>
|
||||
<li>Lazy evaluation of location path. (this may require more changes but
|
||||
sounds really interesting. XT does this too.)</li>
|
||||
<li>Optimization of an expression tree (This could be done as a completely
|
||||
<li>Optimization of an expression tree (This could be done as a completely
|
||||
independent module.)</li>
|
||||
</ul>
|
||||
|
||||
<p></p>
|
||||
Error reporting, there is a lot of case where the XSLT specification specify
|
||||
that a given construct is an error are not checked adequately by libxslt.
|
||||
Basically one should do a complete pass on the XSLT spec again and add all
|
||||
tests to the stylesheet compilation. Using the DTD provided in the appendix and
|
||||
making direct checks using the libxml validation API sounds a good idea too
|
||||
(though one should take care of not raising errors for elements/attributes in
|
||||
different namespaces).
|
||||
|
||||
<p>
|
||||
<p>Error reporting, there is a lot of case where the XSLT specification
|
||||
specify that a given construct is an error are not checked adequately by
|
||||
libxslt. Basically one should do a complete pass on the XSLT spec again and
|
||||
add all tests to the stylesheet compilation. Using the DTD provided in the
|
||||
appendix and making direct checks using the libxml validation API sounds a
|
||||
good idea too (though one should take care of not raising errors for
|
||||
elements/attributes in different namespaces).</p>
|
||||
<p>Double check all the places where the stylesheet compiled form might be
|
||||
modified at run time (extra removal of blanks nodes, hint on the
|
||||
xsltCompMatch).</p>
|
||||
|
||||
<p></p>
|
||||
|
||||
<p>
|
||||
<p><a href="mailto:daniel@veillard.com">Daniel Veillard</a></p>
|
||||
|
||||
<p>$Id$</p>
|
||||
</td></tr></table></td></tr></table></td></tr></table></td>
|
||||
</tr></table></td></tr></table>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
28
doc/site.xsl
28
doc/site.xsl
@@ -7,37 +7,43 @@
|
||||
<xsl:template name="filename">
|
||||
<xsl:param name="name" select="string(@href)"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="$name = "#Introducti"">
|
||||
<xsl:when test="$name = '#Introducti'">
|
||||
<xsl:text>intro.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#Documentat"">
|
||||
<xsl:when test="$name = '#Documentat'">
|
||||
<xsl:text>docs.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#Reporting"">
|
||||
<xsl:when test="$name = '#Reporting'">
|
||||
<xsl:text>bugs.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#help"">
|
||||
<xsl:when test="$name = '#help'">
|
||||
<xsl:text>help.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#Help"">
|
||||
<xsl:when test="$name = '#Help'">
|
||||
<xsl:text>help.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#Downloads"">
|
||||
<xsl:when test="$name = '#Downloads'">
|
||||
<xsl:text>downloads.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#News"">
|
||||
<xsl:when test="$name = '#News'">
|
||||
<xsl:text>news.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#Contributi"">
|
||||
<xsl:when test="$name = '#Contributi'">
|
||||
<xsl:text>contribs.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#xsltproc"">
|
||||
<xsl:when test="$name = '#xsltproc'">
|
||||
<xsl:text>xsltproc2.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = "#API"">
|
||||
<xsl:when test="$name = '#API'">
|
||||
<xsl:text>API.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = """>
|
||||
<xsl:when test="$name = '#Extensions'">
|
||||
<xsl:text>extensions.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = '#Internals'">
|
||||
<xsl:text>internals.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="$name = ''">
|
||||
<xsl:text>unknown.html</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
|
||||
922
doc/xslt.html
922
doc/xslt.html
@@ -12,20 +12,21 @@
|
||||
|
||||
<h1 style="text-align: center">libxslt</h1>
|
||||
|
||||
<p>Libxslt is the XSLT C library developped for the Gnome project. XSLT
|
||||
itself is a an XML language to define transformation for XML. Libxslt is
|
||||
based on libxml2 the XML C library developped for the Gnome project.</p>
|
||||
<p>Libxslt is the <a href="http://www.w3.org/TR/xslt">XSLT</a> C library
|
||||
developped for the Gnome project. XSLT itself is a an XML language to define
|
||||
transformation for XML. Libxslt is based on <a
|
||||
href="http://xmlsoft.org/">libxml2</a> the XML C library developped for the
|
||||
Gnome project. It also implements most of the EXSLT set of extensions
|
||||
functions and some of Saxon's evaluate and expressions extensions.</p>
|
||||
|
||||
<p>People can either embed the library in their application or use xsltproc
|
||||
the command line processing tool.</p>
|
||||
the command line processing tool. This library is free software and can be
|
||||
reused in commercial applications (see the <a href="intro.html">intro</a>)</p>
|
||||
|
||||
<p>External documents:</p>
|
||||
<ul>
|
||||
<li>John Fleck wrote <a href="tutorial/libxslttutorial.html">a tutorial for
|
||||
libxslt</a></li>
|
||||
<li><a href="internals.html">white paper on libxslt internals</a></li>
|
||||
<li><a href="extensions.html">documentation on writing extensions for
|
||||
libxslt</a></li>
|
||||
<li><a href="xsltproc.html">xsltproc user manual</a></li>
|
||||
<li><a href="http://xmlsoft.org/">the libxml documentation</a></li>
|
||||
</ul>
|
||||
@@ -482,6 +483,913 @@ processing needs and environment for example if reading/saving from/to
|
||||
memory, or if you want to apply XInclude processing to the stylesheet or
|
||||
input documents.</p>
|
||||
|
||||
<h2><a name="Internals">Library internals</a></h2>
|
||||
|
||||
<h3>Table of contents</h3>
|
||||
<ul>
|
||||
<li><a href="internals.html#Introducti">Introduction</a></li>
|
||||
<li><a href="internals.html#Basics">Basics</a></li>
|
||||
<li><a href="internals.html#Keep">Keep it simple stupid</a></li>
|
||||
<li><a href="internals.html#libxml">The libxml nodes</a></li>
|
||||
<li><a href="internals.html#XSLT">The XSLT processing steps</a></li>
|
||||
<li><a href="internals.html#XSLT1">The XSLT stylesheet compilation</a></li>
|
||||
<li><a href="internals.html#XSLT2">The XSLT template compilation</a></li>
|
||||
<li><a href="internals.html#processing">The processing itself</a></li>
|
||||
<li><a href="internals.html#XPath">XPath expressions compilation</a></li>
|
||||
<li><a href="internals.html#XPath1">XPath interpretation</a></li>
|
||||
<li><a href="internals.html#Descriptio">Description of XPath
|
||||
Objects</a></li>
|
||||
<li><a href="internals.html#XPath3">XPath functions</a></li>
|
||||
<li><a href="internals.html#stack">The variables stack frame</a></li>
|
||||
<li><a href="internals.html#Extension">Extension support</a></li>
|
||||
<li><a href="internals.html#Futher">Further reading</a></li>
|
||||
<li><a href="internals.html#TODOs">TODOs</a></li>
|
||||
</ul>
|
||||
|
||||
<h3><a name="Introducti2">Introduction</a></h3>
|
||||
|
||||
<p>This document describes the processing of <a
|
||||
href="http://xmlsoft.org/XSLT/">libxslt</a>, the <a
|
||||
href="http://www.w3.org/TR/xslt">XSLT</a> C library developed for the <a
|
||||
href="http://www.gnome.org/">Gnome</a> project.</p>
|
||||
|
||||
<p>Note: this documentation is by definition incomplete and I am not good at
|
||||
spelling, grammar, so patches and suggestions are <a
|
||||
href="mailto:veillard@redhat.com">really welcome</a>.</p>
|
||||
|
||||
<h3><a name="Basics1">Basics</a></h3>
|
||||
|
||||
<p>XSLT is a transformation language. It takes an input document and a
|
||||
stylesheet document and generates an output document:</p>
|
||||
|
||||
<p align="center"><img src="processing.gif"
|
||||
alt="the XSLT processing model"></p>
|
||||
|
||||
<p>Libxslt is written in C. It relies on <a
|
||||
href="http://www.xmlsoft.org/">libxml</a>, the XML C library for Gnome, for
|
||||
the following operations:</p>
|
||||
<ul>
|
||||
<li>parsing files</li>
|
||||
<li>building the in-memory DOM structure associated with the documents
|
||||
handled</li>
|
||||
<li>the XPath implementation</li>
|
||||
<li>serializing back the result document to XML and HTML. (Text is handled
|
||||
directly.)</li>
|
||||
</ul>
|
||||
|
||||
<h3><a name="Keep1">Keep it simple stupid</a></h3>
|
||||
|
||||
<p>Libxslt is not very specialized. It is built under the assumption that all
|
||||
nodes from the source and output document can fit in the virtual memory of
|
||||
the system. There is a big trade-off there. It is fine for reasonably sized
|
||||
documents but may not be suitable for large sets of data. The gain is that it
|
||||
can be used in a relatively versatile way. The input or output may never be
|
||||
serialized, but the size of documents it can handle are limited by the size
|
||||
of the memory available.</p>
|
||||
|
||||
<p>More specialized memory handling approaches are possible, like building
|
||||
the input tree from a serialization progressively as it is consumed,
|
||||
factoring repetitive patterns, or even on-the-fly generation of the output as
|
||||
the input is parsed but it is possible only for a limited subset of the
|
||||
stylesheets. In general the implementation of libxslt follows the following
|
||||
pattern:</p>
|
||||
<ul>
|
||||
<li>KISS (keep it simple stupid)</li>
|
||||
<li>when there is a clear bottleneck optimize on top of this simple
|
||||
framework and refine only as much as is needed to reach the expected
|
||||
result</li>
|
||||
</ul>
|
||||
|
||||
<p>The result is not that bad, clearly one can do a better job but more
|
||||
specialized too. Most optimization like building the tree on-demand would
|
||||
need serious changes to the libxml XPath framework. An easy step would be to
|
||||
serialize the output directly (or call a set of SAX-like output handler to
|
||||
keep this a flexible interface) and hence avoid the memory consumption of the
|
||||
result.</p>
|
||||
|
||||
<h3><a name="libxml">The libxml nodes</a></h3>
|
||||
|
||||
<p>DOM-like trees, as used and generated by libxml and libxslt, are
|
||||
relatively complex. Most node types follow the given structure except a few
|
||||
variations depending on the node type:</p>
|
||||
|
||||
<p align="center"><img src="node.gif" alt="description of a libxml node"></p>
|
||||
|
||||
<p>Nodes carry a <strong>name</strong> and the node <strong>type</strong>
|
||||
indicates the kind of node it represents, the most common ones are:</p>
|
||||
<ul>
|
||||
<li>document nodes</li>
|
||||
<li>element nodes</li>
|
||||
<li>text nodes</li>
|
||||
</ul>
|
||||
|
||||
<p>For the XSLT processing, entity nodes should not be generated (i.e. they
|
||||
should be replaced by their content). Most nodes also contains the following
|
||||
"navigation" informations:</p>
|
||||
<ul>
|
||||
<li>the containing <strong>doc</strong>ument</li>
|
||||
<li>the <strong>parent</strong> node</li>
|
||||
<li>the first <strong>children</strong> node</li>
|
||||
<li>the <strong>last</strong> children node</li>
|
||||
<li>the <strong>prev</strong>ious sibling</li>
|
||||
<li>the following sibling (<strong>next</strong>)</li>
|
||||
</ul>
|
||||
|
||||
<p>Elements nodes carries the list of attributes in the properties, an
|
||||
attribute itself holds the navigation pointers and the children list (the
|
||||
attribute value is not represented as a simple string to allow usage of
|
||||
entities references).</p>
|
||||
|
||||
<p>The <strong>ns</strong> points to the namespace declaration for the
|
||||
namespace associated to the node, <strong>nsDef</strong> is the linked list
|
||||
of namespace declaration present on element nodes.</p>
|
||||
|
||||
<p>Most nodes also carry an <strong>_private</strong> pointer which can be
|
||||
used by the application to hold specific data on this node.</p>
|
||||
|
||||
<h3><a name="XSLT">The XSLT processing steps</a></h3>
|
||||
|
||||
<p>There are a few steps which are clearly decoupled at the interface
|
||||
level:</p>
|
||||
<ol>
|
||||
<li>parse the stylesheet and generate a DOM tree</li>
|
||||
<li>take the stylesheet tree and build a compiled version of it (the
|
||||
compilation phase)</li>
|
||||
<li>take the input and generate a DOM tree</li>
|
||||
<li>process the stylesheet against the input tree and generate an output
|
||||
tree</li>
|
||||
<li>serialize the output tree</li>
|
||||
</ol>
|
||||
|
||||
<p>A few things should be noted here:</p>
|
||||
<ul>
|
||||
<li>the steps 1/ 3/ and 5/ are optional</li>
|
||||
<li>the stylesheet obtained at 2/ can be reused by multiple processing 4/
|
||||
(and this should also work in threaded programs)</li>
|
||||
<li>the tree provided in 2/ should never be freed using xmlFreeDoc, but by
|
||||
freeing the stylesheet.</li>
|
||||
<li>the input tree 4/ is not modified except the _private field which may
|
||||
be used for labelling keys if used by the stylesheet</li>
|
||||
</ul>
|
||||
|
||||
<h3><a name="XSLT1">The XSLT stylesheet compilation</a></h3>
|
||||
|
||||
<p>This is the second step described. It takes a stylesheet tree, and
|
||||
"compiles" it. This associates to each node a structure stored in the
|
||||
_private field and containing information computed in the stylesheet:</p>
|
||||
|
||||
<p align="center"><img src="stylesheet.gif"
|
||||
alt="a compiled XSLT stylesheet"></p>
|
||||
|
||||
<p>One xsltStylesheet structure is generated per document parsed for the
|
||||
stylesheet. XSLT documents allow includes and imports of other documents,
|
||||
imports are stored in the <strong>imports</strong> list (hence keeping the
|
||||
tree hierarchy of includes which is very important for a proper XSLT
|
||||
processing model) and includes are stored in the <strong>doclist</strong>
|
||||
list. An imported stylesheet has a parent link to allow browsing of the
|
||||
tree.</p>
|
||||
|
||||
<p>The DOM tree associated to the document is stored in <strong>doc</strong>.
|
||||
It is preprocessed to remove ignorable empty nodes and all the nodes in the
|
||||
XSLT namespace are subject to precomputing. This usually consist of
|
||||
extracting all the context information from the context tree (attributes,
|
||||
namespaces, XPath expressions), and storing them in an xsltStylePreComp
|
||||
structure associated to the <strong>_private</strong> field of the node.</p>
|
||||
|
||||
<p>A couple of notable exceptions to this are XSLT template nodes (more on
|
||||
this later) and attribute value templates. If they are actually templates,
|
||||
the value cannot be computed at compilation time. (Some preprocessing could
|
||||
be done like isolation and preparsing of the XPath subexpressions but it's
|
||||
not done, yet.)</p>
|
||||
|
||||
<p>The xsltStylePreComp structure also allows storing of the precompiled form
|
||||
of an XPath expression that can be associated to an XSLT element (more on
|
||||
this later).</p>
|
||||
|
||||
<h3><a name="XSLT2">The XSLT template compilation</a></h3>
|
||||
|
||||
<p>A proper handling of templates lookup is one of the keys of fast XSLT
|
||||
processing. (Given a node in the source document this is the process of
|
||||
finding which templates should be applied to this node.) Libxslt follows the
|
||||
hint suggested in the <a href="http://www.w3.org/TR/xslt#patterns">5.2
|
||||
Patterns</a> section of the XSLT Recommendation, i.e. it doesn't evaluate it
|
||||
as an XPath expression but tokenizes it and compiles it as a set of rules to
|
||||
be evaluated on a candidate node. There usually is an indication of the node
|
||||
name in the last step of this evaluation and this is used as a key check for
|
||||
the match. As a result libxslt builds a relatively more complex set of
|
||||
structures for the templates:</p>
|
||||
|
||||
<p align="center"><img src="templates.gif"
|
||||
alt="The templates related structure"></p>
|
||||
|
||||
<p>Let's describe a bit more closely what is built. First the xsltStylesheet
|
||||
structure holds a pointer to the template hash table. All the XSLT patterns
|
||||
compiled in this stylesheet are indexed by the value of the the target
|
||||
element (or attribute, pi ...) name, so when a element or an attribute "foo"
|
||||
needs to be processed the lookup is done using the name as a key.</p>
|
||||
|
||||
<p>Each of the patterns is compiled into an xsltCompMatch structure. It holds
|
||||
the set of rules based on the tokenization of the pattern stored in reverse
|
||||
order (matching is easier this way). It also holds some information about the
|
||||
previous matches used to speed up the process when one iterates over a set of
|
||||
siblings. (This optimization may be defeated by trashing when running
|
||||
threaded computation, it's unclear that this is a big deal in practice.)
|
||||
Predicate expressions are not compiled at this stage, they may be at run-time
|
||||
if needed, but in this case they are compiled as full XPath expressions (the
|
||||
use of some fixed predicate can probably be optimized, they are not yet).</p>
|
||||
|
||||
<p>The xsltCompMatch are then stored in the hash table, the clash list is
|
||||
itself sorted by priority of the template to implement "naturally" the XSLT
|
||||
priority rules.</p>
|
||||
|
||||
<p>Associated to the compiled pattern is the xsltTemplate itself containing
|
||||
the information required for the processing of the pattern including, of
|
||||
course, a pointer to the list of elements used for building the pattern
|
||||
result.</p>
|
||||
|
||||
<p>Last but not least a number of patterns do not fit in the hash table
|
||||
because they are not associated to a name, this is the case for patterns
|
||||
applying to the root, any element, any attributes, text nodes, pi nodes, keys
|
||||
etc. Those are stored independently in the stylesheet structure as separate
|
||||
linked lists of xsltCompMatch.</p>
|
||||
|
||||
<h3><a name="processing">The processing itself</a></h3>
|
||||
|
||||
<p>The processing is defined by the XSLT specification (the basis of the
|
||||
algorithm is explained in <a
|
||||
href="http://www.w3.org/TR/xslt#section-Introduction">the Introduction</a>
|
||||
section). Basically it works by taking the root of the input document and
|
||||
applying the following algorithm:</p>
|
||||
<ol>
|
||||
<li>Finding the template applying to it. This is a lookup in the template
|
||||
hash table, walking the hash list until the node satisfies all the steps
|
||||
of the pattern, then checking the appropriate(s) global templates to see
|
||||
if there isn't a higher priority rule to apply</li>
|
||||
<li>If there is no template, apply the default rule (recurse on the
|
||||
children)</li>
|
||||
<li>else walk the content list of the selected templates, for each of them:
|
||||
<ul>
|
||||
<li>if the node is in the XSLT namespace then the node has a _private
|
||||
field pointing to the preprocessed values, jump to the specific
|
||||
code</li>
|
||||
<li>if the node is in an extension namespace, look up the associated
|
||||
behavior</li>
|
||||
<li>otherwise copy the node.</li>
|
||||
</ul>
|
||||
<p>The closure is usually done through the XSLT
|
||||
<strong>apply-templates</strong> construct recursing by applying the
|
||||
adequate template on the input node children or on the result of an
|
||||
associated XPath selection lookup.</p>
|
||||
</li>
|
||||
</ol>
|
||||
|
||||
<p>Note that large parts of the input tree may not be processed by a given
|
||||
stylesheet and that on the opposite some may be processed multiple times.
|
||||
(This often is the case when a Table of Contents is built).</p>
|
||||
|
||||
<p>The module <code>transform.c</code> is the one implementing most of this
|
||||
logic. <strong>xsltApplyStylesheet()</strong> is the entry point, it
|
||||
allocates an xsltTransformContext containing the following:</p>
|
||||
<ul>
|
||||
<li>a pointer to the stylesheet being processed</li>
|
||||
<li>a stack of templates</li>
|
||||
<li>a stack of variables and parameters</li>
|
||||
<li>an XPath context</li>
|
||||
<li>the template mode</li>
|
||||
<li>current document</li>
|
||||
<li>current input node</li>
|
||||
<li>current selected node list</li>
|
||||
<li>the current insertion points in the output document</li>
|
||||
<li>a couple of hash tables for extension elements and functions</li>
|
||||
</ul>
|
||||
|
||||
<p>Then a new document gets allocated (HTML or XML depending on the type of
|
||||
output), the user parameters and global variables and parameters are
|
||||
evaluated. Then <strong>xsltProcessOneNode()</strong> which implements the
|
||||
1-2-3 algorithm is called on the root element of the input. Step 1/ is
|
||||
implemented by calling <strong>xsltGetTemplate()</strong>, step 2/ is
|
||||
implemented by <strong>xsltDefaultProcessOneNode()</strong> and step 3/ is
|
||||
implemented by <strong>xsltApplyOneTemplate()</strong>.</p>
|
||||
|
||||
<h3><a name="XPath">XPath expression compilation</a></h3>
|
||||
|
||||
<p>The XPath support is actually implemented in the libxml module (where it
|
||||
is reused by the XPointer implementation). XPath is a relatively classic
|
||||
expression language. The only uncommon feature is that it is working on XML
|
||||
trees and hence has specific syntax and types to handle them.</p>
|
||||
|
||||
<p>XPath expressions are compiled using <strong>xmlXPathCompile()</strong>.
|
||||
It will take an expression string in input and generate a structure
|
||||
containing the parsed expression tree, for example the expression:</p>
|
||||
<pre>/doc/chapter[title='Introduction']</pre>
|
||||
|
||||
<p>will be compiled as</p>
|
||||
<pre>Compiled Expression : 10 elements
|
||||
SORT
|
||||
COLLECT 'child' 'name' 'node' chapter
|
||||
COLLECT 'child' 'name' 'node' doc
|
||||
ROOT
|
||||
PREDICATE
|
||||
SORT
|
||||
EQUAL =
|
||||
COLLECT 'child' 'name' 'node' title
|
||||
NODE
|
||||
ELEM Object is a string : Introduction
|
||||
COLLECT 'child' 'name' 'node' title
|
||||
NODE</pre>
|
||||
|
||||
<p>This can be tested using the <code>testXPath</code> command (in the
|
||||
libxml codebase) using the <code>--tree</code> option.</p>
|
||||
|
||||
<p>Again, the KISS approach is used. No optimization is done. This could be
|
||||
an interesting thing to add. <a
|
||||
href="http://www-106.ibm.com/developerworks/library/x-xslt2/?dwzone=x?open&l=132%2ct=gr%2c+p=saxon">Michael
|
||||
Kay describes</a> a lot of possible and interesting optimizations done in
|
||||
Saxon which would be possible at this level. I'm unsure they would provide
|
||||
much gain since the expressions tends to be relatively simple in general and
|
||||
stylesheets are still hand generated. Optimizations at the interpretation
|
||||
sounds likely to be more efficient.</p>
|
||||
|
||||
<h3><a name="XPath1">XPath interpretation</a></h3>
|
||||
|
||||
<p>The interpreter is implemented by <strong>xmlXPathCompiledEval()</strong>
|
||||
which is the front-end to <strong>xmlXPathCompOpEval()</strong> the function
|
||||
implementing the evaluation of the expression tree. This evaluation follows
|
||||
the KISS approach again. It's recursive and calls
|
||||
<strong>xmlXPathNodeCollectAndTest()</strong> to collect nodes set when
|
||||
evaluating a <code>COLLECT</code> node.</p>
|
||||
|
||||
<p>An evaluation is done within the framework of an XPath context stored in
|
||||
an <strong>xmlXPathContext</strong> structure, in the framework of a
|
||||
transformation the context is maintained within the XSLT context. Its content
|
||||
follows the requirements from the XPath specification:</p>
|
||||
<ul>
|
||||
<li>the current document</li>
|
||||
<li>the current node</li>
|
||||
<li>a hash table of defined variables (but not used by XSLT)</li>
|
||||
<li>a hash table of defined functions</li>
|
||||
<li>the proximity position (the place of the node in the current node
|
||||
list)</li>
|
||||
<li>the context size (the size of the current node list)</li>
|
||||
<li>the array of namespace declarations in scope (there also is a namespace
|
||||
hash table but it is not used in the XSLT transformation).</li>
|
||||
</ul>
|
||||
|
||||
<p>For the purpose of XSLT an <strong>extra</strong> pointer has been added
|
||||
allowing to retrieve the XSLT transformation context. When an XPath
|
||||
evaluation is about to be performed, an XPath parser context is allocated
|
||||
containing and XPath object stack (this is actually an XPath evaluation
|
||||
context, this is a remain of the time where there was no separate parsing and
|
||||
evaluation phase in the XPath implementation). Here is an overview of the set
|
||||
of contexts associated to an XPath evaluation within an XSLT
|
||||
transformation:</p>
|
||||
|
||||
<p align="center"><img src="contexts.gif"
|
||||
alt="The set of contexts associated "></p>
|
||||
|
||||
<p>Clearly this is a bit too complex and confusing and should be refactored
|
||||
at the next set of binary incompatible releases of libxml. For example the
|
||||
xmlXPathCtxt has a lot of unused parts and should probably be merged with
|
||||
xmlXPathParserCtxt.</p>
|
||||
|
||||
<h3><a name="Descriptio">Description of XPath Objects</a></h3>
|
||||
|
||||
<p>An XPath expression manipulates XPath objects. XPath defines the default
|
||||
types boolean, numbers, strings and node sets. XSLT adds the result tree
|
||||
fragment type which is basically an unmodifiable node set.</p>
|
||||
|
||||
<p>Implementation-wise, libxml follows again a KISS approach, the
|
||||
xmlXPathObject is a structure containing a type description and the various
|
||||
possibilities. (Using an enum could have gained some bytes.) In the case of
|
||||
node sets (or result tree fragments), it points to a separate xmlNodeSet
|
||||
object which contains the list of pointers to the document nodes:</p>
|
||||
|
||||
<p align="center"><img src="object.gif"
|
||||
alt="An Node set object pointing to "></p>
|
||||
|
||||
<p>The <a href="http://xmlsoft.org/html/libxml-xpath.html">XPath API</a> (and
|
||||
its <a href="http://xmlsoft.org/html/libxml-xpathinternals.html">'internal'
|
||||
part</a>) includes a number of functions to create, copy, compare, convert or
|
||||
free XPath objects.</p>
|
||||
|
||||
<h3><a name="XPath3">XPath functions</a></h3>
|
||||
|
||||
<p>All the XPath functions available to the interpreter are registered in the
|
||||
function hash table linked from the XPath context. They all share the same
|
||||
signature:</p>
|
||||
<pre>void xmlXPathFunc (xmlXPathParserContextPtr ctxt, int nargs);</pre>
|
||||
|
||||
<p>The first argument is the XPath interpretation context, holding the
|
||||
interpretation stack. The second argument defines the number of objects
|
||||
passed on the stack for the function to consume (last argument is on top of
|
||||
the stack).</p>
|
||||
|
||||
<p>Basically an XPath function does the following:</p>
|
||||
<ul>
|
||||
<li>check <code>nargs</code> for proper handling of errors or functions
|
||||
with variable numbers of parameters</li>
|
||||
<li>pop the parameters from the stack using <code>obj =
|
||||
valuePop(ctxt);</code></li>
|
||||
<li>do the function specific computation</li>
|
||||
<li>push the result parameter on the stack using <code>valuePush(ctxt,
|
||||
res);</code></li>
|
||||
<li>free up the input parameters with
|
||||
<code>xmlXPathFreeObject(obj);</code></li>
|
||||
<li>return</li>
|
||||
</ul>
|
||||
|
||||
<p>Sometime the work can be done directly by modifying in-situ the top object
|
||||
on the stack <code>ctxt->value</code>.</p>
|
||||
|
||||
<h3><a name="stack">The XSLT variables stack frame</a></h3>
|
||||
|
||||
<p>Not to be confused with XPath object stack, this stack holds the XSLT
|
||||
variables and parameters as they are defined through the recursive calls of
|
||||
call-template, apply-templates and default templates. This is used to define
|
||||
the scope of variables being called.</p>
|
||||
|
||||
<p>This part seems to be the most urgent attention right now, first it is
|
||||
done in a very inefficient way since the location of the variables and
|
||||
parameters within the stylesheet tree is still done at run time (it really
|
||||
should be done statically at compile time), and I am still unsure that my
|
||||
understanding of the template variables and parameter scope is actually
|
||||
right.</p>
|
||||
|
||||
<p>This part of the documentation is still to be written once this part of
|
||||
the code will be stable. <span
|
||||
style="background-color: #FF0000">TODO</span></p>
|
||||
|
||||
<h3><a name="Extension">Extension support</a></h3>
|
||||
|
||||
<p>There is a separate document explaining <a href="extensions.html">how the
|
||||
extension support works</a>.</p>
|
||||
|
||||
<h3><a name="Futher">Further reading</a></h3>
|
||||
|
||||
<p>Michael Kay wrote <a
|
||||
href="http://www-106.ibm.com/developerworks/library/x-xslt2/?dwzone=x?open&l=132%2ct=gr%2c+p=saxon">a
|
||||
really interesting article on Saxon internals</a> and the work he did on
|
||||
performance issues. I wishes I had read it before starting libxslt design (I
|
||||
would probably have avoided a few mistakes and progressed faster). A lot of
|
||||
the ideas in his papers should be implemented or at least tried in
|
||||
libxslt.</p>
|
||||
|
||||
<p>The <a href="http://xmlsoft.org/">libxml documentation</a>, especially <a
|
||||
href="http://xmlsoft.org/xmlio.html">the I/O interfaces</a> and the <a
|
||||
href="http://xmlsoft.org/xmlmem.html">memory management</a>.</p>
|
||||
|
||||
<h3><a name="TODOs">TODOs</a></h3>
|
||||
|
||||
<p>redesign the XSLT stack frame handling. Far too much work is done at
|
||||
execution time. Similarly for the attribute value templates handling, at
|
||||
least the embedded subexpressions ought to be precompiled.</p>
|
||||
|
||||
<p>Allow output to be saved to a SAX like output (this notion of SAX like API
|
||||
for output should be added directly to libxml).</p>
|
||||
|
||||
<p>Implement and test some of the optimization explained by Michael Kay
|
||||
especially:</p>
|
||||
<ul>
|
||||
<li>static slot allocation on the stack frame</li>
|
||||
<li>specific boolean interpretation of an XPath expression</li>
|
||||
<li>some of the sorting optimization</li>
|
||||
<li>Lazy evaluation of location path. (this may require more changes but
|
||||
sounds really interesting. XT does this too.)</li>
|
||||
<li>Optimization of an expression tree (This could be done as a completely
|
||||
independent module.)</li>
|
||||
</ul>
|
||||
|
||||
<p></p>
|
||||
|
||||
<p>Error reporting, there is a lot of case where the XSLT specification
|
||||
specify that a given construct is an error are not checked adequately by
|
||||
libxslt. Basically one should do a complete pass on the XSLT spec again and
|
||||
add all tests to the stylesheet compilation. Using the DTD provided in the
|
||||
appendix and making direct checks using the libxml validation API sounds a
|
||||
good idea too (though one should take care of not raising errors for
|
||||
elements/attributes in different namespaces).</p>
|
||||
|
||||
<p>Double check all the places where the stylesheet compiled form might be
|
||||
modified at run time (extra removal of blanks nodes, hint on the
|
||||
xsltCompMatch).</p>
|
||||
|
||||
<p></p>
|
||||
|
||||
<h2><a name="Extensions">Writing extensions</a></h2>
|
||||
|
||||
<h3>Table of content</h3>
|
||||
<ul>
|
||||
<li><a href="extensions.html#Introducti">Introduction</a></li>
|
||||
<li><a href="extensions.html#Basics">Basics</a></li>
|
||||
<li><a href="extensions.html#Keep">Extension modules</a></li>
|
||||
<li><a href="extensions.html#Registerin">Registering a module</a></li>
|
||||
<li><a href="extensions.html#module">Loading a module</a></li>
|
||||
<li><a href="extensions.html#Registerin1">Registering an extension
|
||||
function</a></li>
|
||||
<li><a href="extensions.html#Implementi">Implementing an extension
|
||||
function</a></li>
|
||||
<li><a href="extensions.html#Examples">Examples for extension
|
||||
functions</a></li>
|
||||
<li><a href="extensions.html#Registerin2">Registering an extension
|
||||
element</a></li>
|
||||
<li><a href="extensions.html#Implementi1">Implementing an extension
|
||||
element</a></li>
|
||||
<li><a href="extensions.html#Example">Example for extension
|
||||
elements</a></li>
|
||||
<li><a href="extensions.html#shutdown">The shutdown of a module</a></li>
|
||||
<li><a href="extensions.html#Future">Future work</a></li>
|
||||
</ul>
|
||||
|
||||
<h3><a name="Introducti1">Introduction</a></h3>
|
||||
|
||||
<p>This document describes the work needed to write extensions to the
|
||||
standard XSLT library for use with <a
|
||||
href="http://xmlsoft.org/XSLT/">libxslt</a>, the <a
|
||||
href="http://www.w3.org/TR/xslt">XSLT</a> C library developped for the <a
|
||||
href="http://www.gnome.org/">Gnome</a> project.</p>
|
||||
|
||||
<p>Before starting reading this document it is highly recommended to get
|
||||
familiar with <a href="internals.html">the libxslt internals</a>.</p>
|
||||
|
||||
<p>Note: this documentation is by definition incomplete and I am not good at
|
||||
spelling, grammar, so patches and suggestions are <a
|
||||
href="mailto:veillard@redhat.com">really welcome</a>.</p>
|
||||
|
||||
<h3><a name="Basics">Basics</a></h3>
|
||||
|
||||
<p>The <a href="http://www.w3.org/TR/xslt">XSLT specification</a> provides
|
||||
two <a href="http://www.w3.org/TR/xslt">ways to extend an XSLT engine</a>:</p>
|
||||
<ul>
|
||||
<li>providing <a href="http://www.w3.org/TR/xslt">new extension
|
||||
functions</a> which can be called from XPath expressions</li>
|
||||
<li>providing <a href="http://www.w3.org/TR/xslt">new extension
|
||||
elements</a> which can be inserted in stylesheets</li>
|
||||
</ul>
|
||||
|
||||
<p>In both cases the extensions need to be associated to a new namespace,
|
||||
i.e. an URI used as the name for the extension's namespace (there is no need
|
||||
to have a resource there for this to work).</p>
|
||||
|
||||
<p>libxslt provides a few extensions itself, either in libxslt namespace
|
||||
"http://xmlsoft.org/XSLT/" or in other namespace for well known extensions
|
||||
provided by other XSLT processors like Saxon, Xalan or XT.</p>
|
||||
|
||||
<h3><a name="Keep">Extension modules</a></h3>
|
||||
|
||||
<p>Since extensions are bound to a namespace name, usually sets of extensions
|
||||
coming from a given source are using the same namespace name defining in
|
||||
practice a group of extensions providing elements, functions or both. From
|
||||
libxslt point of view those are considered as an "extension module", and most
|
||||
of the APIs work at a module point of view.</p>
|
||||
|
||||
<p>Registration of new functions or elements are bound to the activation of
|
||||
the module, this is currently done by declaring the namespace as an extension
|
||||
by using the attribute <code>extension-element-prefixes</code> on the
|
||||
<code><a href="http://www.w3.org/TR/xslt">xsl:stylesheet</a></code>
|
||||
element.</p>
|
||||
|
||||
<p>And extension module is defined by 3 objects:</p>
|
||||
<ul>
|
||||
<li>the namespace name associated</li>
|
||||
<li>an initialization function</li>
|
||||
<li>a shutdown function</li>
|
||||
</ul>
|
||||
|
||||
<h3><a name="Registerin">Registering a module</a></h3>
|
||||
|
||||
<p>Currently a libxslt module has to be compiled within the application using
|
||||
libxslt, there is no code to load dynamically shared libraries associated to
|
||||
namespace (this may be added but is likely to become a portability
|
||||
nightmare).</p>
|
||||
|
||||
<p>So the current way to register a module is to link the code implementing
|
||||
it with the application and to call a registration function:</p>
|
||||
<pre>int xsltRegisterExtModule(const xmlChar *URI,
|
||||
xsltExtInitFunction initFunc,
|
||||
xsltExtShutdownFunction shutdownFunc);</pre>
|
||||
|
||||
<p>The associated header is read by:</p>
|
||||
<pre>#include<libxslt/extensions.h></pre>
|
||||
|
||||
<p>which also defines the type for the initialization and shutdown
|
||||
functions</p>
|
||||
|
||||
<h3><a name="module">Loading a module</a></h3>
|
||||
|
||||
<p>Once the module URI has been registered and if the XSLT processor detects
|
||||
that a given stylesheet needs the functionalities of an extended module, this
|
||||
one is initialized.</p>
|
||||
|
||||
<p>The xsltExtInitFunction type defines the interface for an initialization
|
||||
function:</p>
|
||||
<pre>/**
|
||||
* xsltExtInitFunction:
|
||||
* @ctxt: an XSLT transformation context
|
||||
* @URI: the namespace URI for the extension
|
||||
*
|
||||
* A function called at initialization time of an XSLT
|
||||
* extension module
|
||||
*
|
||||
* Returns a pointer to the module specific data for this
|
||||
* transformation
|
||||
*/
|
||||
typedef void *(*xsltExtInitFunction)(xsltTransformContextPtr ctxt,
|
||||
const xmlChar *URI);</pre>
|
||||
|
||||
<p>There are 3 things to notice:</p>
|
||||
<ul>
|
||||
<li>the function gets passed the namespace name URI as an argument, this
|
||||
allow a single function to provide the initialization for multiple
|
||||
logical modules</li>
|
||||
<li>it also gets passed a transformation context, the initialization is
|
||||
done at run time before any processing occurs on the stylesheet but it
|
||||
will be invoked separately each time for each transformation</li>
|
||||
<li>it returns a pointer, this can be used to store module specific
|
||||
informations which can be retrieved later when a function or an element
|
||||
from the extension are used, an obvious example is a connection to a
|
||||
database which should be kept and reused along the transformation. NULL
|
||||
is a perfectly valid return, there is no way to indicate a failure at
|
||||
this level</li>
|
||||
</ul>
|
||||
|
||||
<p>What this function is expected to do is:</p>
|
||||
<ul>
|
||||
<li>prepare the context for this module (like opening the database
|
||||
connection)</li>
|
||||
<li>register the extensions specific to this module</li>
|
||||
</ul>
|
||||
|
||||
<h3><a name="Registerin1">Registering an extension function</a></h3>
|
||||
|
||||
<p>There is a single call to do this registration:</p>
|
||||
<pre>int xsltRegisterExtFunction(xsltTransformContextPtr ctxt,
|
||||
const xmlChar *name,
|
||||
const xmlChar *URI,
|
||||
xmlXPathEvalFunc function);</pre>
|
||||
|
||||
<p>The registration is bound to a single transformation instance referred by
|
||||
ctxt, name is the UTF8 encoded name for the NCName of the function, and URI
|
||||
is the namespace name for the extension (no checking is done, a module could
|
||||
register functions or elements from a different namespace, but it is not
|
||||
recommended).</p>
|
||||
|
||||
<h3><a name="Implementi">Implementing an extension function</a></h3>
|
||||
|
||||
<p>The implementation of the function must have the signature of a libxml
|
||||
XPath function:</p>
|
||||
<pre>/**
|
||||
* xmlXPathEvalFunc:
|
||||
* @ctxt: an XPath parser context
|
||||
* @nargs: the number of arguments passed to the function
|
||||
*
|
||||
* an XPath evaluation function, the parameters are on the
|
||||
* XPath context stack
|
||||
*/
|
||||
|
||||
typedef void (*xmlXPathEvalFunc)(xmlXPathParserContextPtr ctxt,
|
||||
int nargs);</pre>
|
||||
|
||||
<p>The context passed to an XPath function is not an XSLT context but an <a
|
||||
href="internals.html#XPath1">XPath context</a>. However it is possible to
|
||||
find one from the other:</p>
|
||||
<ul>
|
||||
<li>The function xsltXPathGetTransformContext provide this lookup facility:
|
||||
<pre>xsltTransformContextPtr
|
||||
xsltXPathGetTransformContext
|
||||
(xmlXPathParserContextPtr ctxt);</pre>
|
||||
</li>
|
||||
<li>The <code>xmlXPathContextPtr</code> associated to an
|
||||
<code>xsltTransformContext</code> is stored in the <code>xpathCtxt</code>
|
||||
field.</li>
|
||||
</ul>
|
||||
|
||||
<p>The first thing an extension function may want to do is to check the
|
||||
arguments passed on the stack, the <code>nargs</code> will precise how many
|
||||
of them were provided on the XPath expression. The macros valuePop will
|
||||
extract them from the XPath stack:</p>
|
||||
<pre>#include <libxml/xpath.h>
|
||||
#include <libxml/xpathInternals.h>
|
||||
|
||||
xmlXPathObjectPtr obj = valuePop(ctxt); </pre>
|
||||
|
||||
<p>Note that <code>ctxt</code> is the XPath context not the XSLT one. It is
|
||||
then possible to examine the content of the value. Check <a
|
||||
href="internals.html#Descriptio">the description of XPath objects</a> if
|
||||
necessary. The following is a common sequcnce checking whether the argument
|
||||
passed is a string and converting it using the built-in XPath
|
||||
<code>string()</code> function if this is not the case:</p>
|
||||
<pre>if (obj->type != XPATH_STRING) {
|
||||
valuePush(ctxt, obj);
|
||||
xmlXPathStringFunction(ctxt, 1);
|
||||
obj = valuePop(ctxt);
|
||||
}</pre>
|
||||
|
||||
<p>Most common XPath functions are available directly at the C level and are
|
||||
exported either in <code><libxml/xpath.h></code> or in
|
||||
<code><libxml/xpathInternals.h></code>.</p>
|
||||
|
||||
<p>The extension function may also need to retrieve the data associated to
|
||||
this module instance (the database connection in the previous example) this
|
||||
can be done using the xsltGetExtData:</p>
|
||||
<pre>void * xsltGetExtData(xsltTransformContextPtr ctxt,
|
||||
const xmlChar *URI);</pre>
|
||||
|
||||
<p>again the URI to be provided is the one used which was used when
|
||||
registering the module.</p>
|
||||
|
||||
<p>Once the function finishes, don't forget to:</p>
|
||||
<ul>
|
||||
<li>push the return value on the stack using <code>valuePush(ctxt,
|
||||
obj)</code></li>
|
||||
<li>deallocate the parameters passed to the function using
|
||||
<code>xmlXPathFreeObject(obj)</code></li>
|
||||
</ul>
|
||||
|
||||
<h3><a name="Examples">Examples for extension functions</a></h3>
|
||||
|
||||
<p>The module libxslt/functions.c containsthe sources of the XSLT built-in
|
||||
functions, including document(), key(), generate-id(), etc. as well as a full
|
||||
example module at the end. Here is the test function implementation for the
|
||||
libxslt:test function:</p>
|
||||
<pre>/**
|
||||
* xsltExtFunctionTest:
|
||||
* @ctxt: the XPath Parser context
|
||||
* @nargs: the number of arguments
|
||||
*
|
||||
* function libxslt:test() for testing the extensions support.
|
||||
*/
|
||||
static void
|
||||
xsltExtFunctionTest(xmlXPathParserContextPtr ctxt, int nargs)
|
||||
{
|
||||
xsltTransformContextPtr tctxt;
|
||||
void *data;
|
||||
|
||||
tctxt = xsltXPathGetTransformContext(ctxt);
|
||||
if (tctxt == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtFunctionTest: failed to get the transformation context\n");
|
||||
return;
|
||||
}
|
||||
data = xsltGetExtData(tctxt, (const xmlChar *) XSLT_DEFAULT_URL);
|
||||
if (data == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtFunctionTest: failed to get module data\n");
|
||||
return;
|
||||
}
|
||||
#ifdef WITH_XSLT_DEBUG_FUNCTION
|
||||
xsltGenericDebug(xsltGenericDebugContext,
|
||||
"libxslt:test() called with %d args\n", nargs);
|
||||
#endif
|
||||
}</pre>
|
||||
|
||||
<h3><a name="Registerin2">Registering an extension function</a></h3>
|
||||
|
||||
<p>There is a single call to do this registration:</p>
|
||||
<pre>int xsltRegisterExtElement(xsltTransformContextPtr ctxt,
|
||||
const xmlChar *name,
|
||||
const xmlChar *URI,
|
||||
xsltTransformFunction function);</pre>
|
||||
|
||||
<p>It is similar to the mechanism used to register an extension function,
|
||||
except that the signature of an extension element implementation is
|
||||
different.</p>
|
||||
|
||||
<p>The registration is bound to a single transformation instance referred by
|
||||
ctxt, name is the UTF8 encoded name for the NCName of the element, and URI is
|
||||
the namespace name for the extension (no checking is done, a module could
|
||||
register elements for a different namespace, but it is not recommended).</p>
|
||||
|
||||
<h3><a name="Implementi1">Implementing an extension element</a></h3>
|
||||
|
||||
<p>The implementation of the element must have the signature of an XSLT
|
||||
transformation function:</p>
|
||||
<pre>/**
|
||||
* xsltTransformFunction:
|
||||
* @ctxt: the XSLT transformation context
|
||||
* @node: the input node
|
||||
* @inst: the stylesheet node
|
||||
* @comp: the compiled information from the stylesheet
|
||||
*
|
||||
* signature of the function associated to elements part of the
|
||||
* stylesheet language like xsl:if or xsl:apply-templates.
|
||||
*/
|
||||
typedef void (*xsltTransformFunction)
|
||||
(xsltTransformContextPtr ctxt,
|
||||
xmlNodePtr node,
|
||||
xmlNodePtr inst,
|
||||
xsltStylePreCompPtr comp);</pre>
|
||||
|
||||
<p>The first argument is the XSLT transformation context. The second and
|
||||
third arguments are xmlNodePtr i.e. internal memory <a
|
||||
href="internals.html#libxml">representation of XML nodes</a>. They are
|
||||
respectively <code>node</code> from the the input document being transformed
|
||||
by the stylesheet and <code>inst</code> the extension element in the
|
||||
stylesheet. The last argument is <code>comp</code> a pointer to a precompiled
|
||||
representation of <code>inst</code> but usually for extension function this
|
||||
value is <code>NULL</code> by default (it could be added and associated to
|
||||
the instruction in <code>inst->_private</code>).</p>
|
||||
|
||||
<p>The same functions are available from a function implementing an extension
|
||||
element as in an extension function, including
|
||||
<code>xsltGetExtData()</code>.</p>
|
||||
|
||||
<p>The goal of extension element being usually to enrich the generated
|
||||
output, it is expected that they will grow the currently generated output
|
||||
tree, this can be done by grabbing ctxt->insert which is the current
|
||||
libxml node being generated (Note this can also be the intermediate value
|
||||
tree being built for example to initialize a variable, the processing should
|
||||
be similar). The functions for libxml tree manipulation from <a
|
||||
href="http://xmlsoft.org/html/libxml-tree.html"><libxml/tree.h></a> can
|
||||
be employed to extend or modify the tree, but it is required to preserve the
|
||||
insertion node and its ancestors since there is existing pointers to those
|
||||
elements still in use in the XSLT template execution stack.</p>
|
||||
|
||||
<h3><a name="Example">Example for extension elements</a></h3>
|
||||
|
||||
<p>The module libxslt/transform.c containsthe sources of the XSLT built-in
|
||||
elements, including xsl:element, xsl:attribute, xsl:if, etc. There is a small
|
||||
but full example in functions.c providing the implementation for the
|
||||
libxslt:test element, it will output a comment in the result tree:</p>
|
||||
<pre>/**
|
||||
* xsltExtElementTest:
|
||||
* @ctxt: an XSLT processing context
|
||||
* @node: The current node
|
||||
* @inst: the instruction in the stylesheet
|
||||
* @comp: precomputed informations
|
||||
*
|
||||
* Process a libxslt:test node
|
||||
*/
|
||||
static void
|
||||
xsltExtElementTest(xsltTransformContextPtr ctxt, xmlNodePtr node,
|
||||
xmlNodePtr inst,
|
||||
xsltStylePreCompPtr comp)
|
||||
{
|
||||
xmlNodePtr comment;
|
||||
|
||||
if (ctxt == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtElementTest: no transformation context\n");
|
||||
return;
|
||||
}
|
||||
if (node == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtElementTest: no current node\n");
|
||||
return;
|
||||
}
|
||||
if (inst == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtElementTest: no instruction\n");
|
||||
return;
|
||||
}
|
||||
if (ctxt->insert == NULL) {
|
||||
xsltGenericError(xsltGenericErrorContext,
|
||||
"xsltExtElementTest: no insertion point\n");
|
||||
return;
|
||||
}
|
||||
comment =
|
||||
xmlNewComment((const xmlChar *)
|
||||
"libxslt:test element test worked");
|
||||
xmlAddChild(ctxt->insert, comment);
|
||||
}</pre>
|
||||
|
||||
<h3><a name="shutdown">The shutdown of a module</a></h3>
|
||||
|
||||
<p>When the XSLT processor ends a transformation, the shutdown function (if
|
||||
it exists) of all the modules initialized are called.The
|
||||
xsltExtShutdownFunction type defines the interface for a shutdown
|
||||
function:</p>
|
||||
<pre>/**
|
||||
* xsltExtShutdownFunction:
|
||||
* @ctxt: an XSLT transformation context
|
||||
* @URI: the namespace URI for the extension
|
||||
* @data: the data associated to this module
|
||||
*
|
||||
* A function called at shutdown time of an XSLT extension module
|
||||
*/
|
||||
typedef void (*xsltExtShutdownFunction) (xsltTransformContextPtr ctxt,
|
||||
const xmlChar *URI,
|
||||
void *data);</pre>
|
||||
|
||||
<p>this is really similar to a module initialization function except a third
|
||||
argument is passed, it's the value that was returned by the initialization
|
||||
function. This allow to deallocate resources from the module for example
|
||||
close the connection to the database to keep the same example.</p>
|
||||
|
||||
<h3><a name="Future">Future work</a></h3>
|
||||
|
||||
<p>Well some of the pieces missing:</p>
|
||||
<ul>
|
||||
<li>a way to load shared libraries to instanciate new modules</li>
|
||||
<li>a better detection of extension function usage and their registration
|
||||
without having to use the extension prefix which ought to be reserved to
|
||||
element extensions.</li>
|
||||
<li>more examples</li>
|
||||
<li>implementations of the <a href="http://www.exslt.org/">EXSLT</a> common
|
||||
extension libraries, Thomas Broyer nearly finished implementing them.</li>
|
||||
</ul>
|
||||
|
||||
<p></p>
|
||||
|
||||
<h2><a name="Contributi">Contributions</a></h2>
|
||||
<ul>
|
||||
<li>Bjorn Reese is the author of the number support and worked on the
|
||||
|
||||
@@ -36,6 +36,8 @@ A:link, A:visited, A:active { text-decoration: underline }
|
||||
<li><a href="news.html">News</a></li>
|
||||
<li><a href="xsltproc2.html">The xsltproc tool</a></li>
|
||||
<li><a href="API.html">The programming API</a></li>
|
||||
<li><a href="internals.html">Library internals</a></li>
|
||||
<li><a href="extensions.html">Writing extensions</a></li>
|
||||
<li><a href="contribs.html">Contributions</a></li>
|
||||
<li>
|
||||
<a href="xslt.html">flat page</a>, <a href="site.xsl">stylesheet</a>
|
||||
|
||||
Reference in New Issue
Block a user