mirror of
				https://github.com/apache/httpd.git
				synced 2025-11-03 17:53:20 +03:00 
			
		
		
		
	git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1202314 13f79535-47bb-0310-9956-ffa450edef68
		
			
				
	
	
		
			1222 lines
		
	
	
		
			62 KiB
		
	
	
	
		
			XML
		
	
	
	
	
	
			
		
		
	
	
			1222 lines
		
	
	
		
			62 KiB
		
	
	
	
		
			XML
		
	
	
	
	
	
<?xml version="1.0" encoding="ISO-8859-1"?>
 | 
						|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 | 
						|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head><!--
 | 
						|
        XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 | 
						|
              This file is generated from xml source: DO NOT EDIT
 | 
						|
        XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 | 
						|
      -->
 | 
						|
<title>Apache 1.3 API notes - Apache HTTP Server</title>
 | 
						|
<link href="../style/css/manual.css" rel="stylesheet" media="all" type="text/css" title="Main stylesheet" />
 | 
						|
<link href="../style/css/manual-loose-100pc.css" rel="alternate stylesheet" media="all" type="text/css" title="No Sidebar - Default font size" />
 | 
						|
<link href="../style/css/manual-print.css" rel="stylesheet" media="print" type="text/css" />
 | 
						|
<link href="../images/favicon.ico" rel="shortcut icon" /></head>
 | 
						|
<body id="manual-page"><div id="page-header">
 | 
						|
<p class="menu"><a href="../mod/">Modules</a> | <a href="../mod/directives.html">Directives</a> | <a href="../faq/">FAQ</a> | <a href="../glossary.html">Glossary</a> | <a href="../sitemap.html">Sitemap</a></p>
 | 
						|
<p class="apache">Apache HTTP Server Version 2.5</p>
 | 
						|
<img alt="" src="../images/feather.gif" /></div>
 | 
						|
<div class="up"><a href="./"><img title="<-" alt="<-" src="../images/left.gif" /></a></div>
 | 
						|
<div id="path">
 | 
						|
<a href="http://www.apache.org/">Apache</a> > <a href="http://httpd.apache.org/">HTTP Server</a> > <a href="http://httpd.apache.org/docs/">Documentation</a> > <a href="../">Version 2.5</a> > <a href="./">Developer Documentation</a></div><div id="page-content"><div id="preamble"><h1>Apache 1.3 API notes</h1>
 | 
						|
<div class="toplang">
 | 
						|
<p><span>Available Languages: </span><a href="../en/developer/API.html" title="English"> en </a></p>
 | 
						|
</div>
 | 
						|
 | 
						|
    <div class="warning"><h3>Warning</h3>
 | 
						|
      <p>This document has not been updated to take into account changes made
 | 
						|
      in the 2.0 version of the Apache HTTP Server. Some of the information may
 | 
						|
      still be relevant, but please use it with care.</p>
 | 
						|
    </div>
 | 
						|
 | 
						|
    <p>These are some notes on the Apache API and the data structures you have
 | 
						|
    to deal with, <em>etc.</em> They are not yet nearly complete, but hopefully,
 | 
						|
    they will help you get your bearings. Keep in mind that the API is still
 | 
						|
    subject to change as we gain experience with it. (See the TODO file for
 | 
						|
    what <em>might</em> be coming). However, it will be easy to adapt modules
 | 
						|
    to any changes that are made. (We have more modules to adapt than you
 | 
						|
    do).</p>
 | 
						|
 | 
						|
    <p>A few notes on general pedagogical style here. In the interest of
 | 
						|
    conciseness, all structure declarations here are incomplete -- the real
 | 
						|
    ones have more slots that I'm not telling you about. For the most part,
 | 
						|
    these are reserved to one component of the server core or another, and
 | 
						|
    should be altered by modules with caution. However, in some cases, they
 | 
						|
    really are things I just haven't gotten around to yet. Welcome to the
 | 
						|
    bleeding edge.</p>
 | 
						|
 | 
						|
    <p>Finally, here's an outline, to give you some bare idea of what's coming
 | 
						|
    up, and in what order:</p>
 | 
						|
 | 
						|
    <ul>
 | 
						|
      <li>
 | 
						|
        <a href="#basics">Basic concepts.</a>
 | 
						|
 | 
						|
        <ul>
 | 
						|
          <li><a href="#HMR">Handlers, Modules, and
 | 
						|
          Requests</a></li>
 | 
						|
 | 
						|
          <li><a href="#moduletour">A brief tour of a
 | 
						|
          module</a></li>
 | 
						|
        </ul>
 | 
						|
      </li>
 | 
						|
 | 
						|
      <li>
 | 
						|
        <a href="#handlers">How handlers work</a>
 | 
						|
 | 
						|
        <ul>
 | 
						|
          <li><a href="#req_tour">A brief tour of the
 | 
						|
          <code>request_rec</code></a></li>
 | 
						|
 | 
						|
          <li><a href="#req_orig">Where request_rec structures come
 | 
						|
          from</a></li>
 | 
						|
 | 
						|
          <li><a href="#req_return">Handling requests, declining,
 | 
						|
          and returning error codes</a></li>
 | 
						|
 | 
						|
          <li><a href="#resp_handlers">Special considerations for
 | 
						|
          response handlers</a></li>
 | 
						|
 | 
						|
          <li><a href="#auth_handlers">Special considerations for
 | 
						|
          authentication handlers</a></li>
 | 
						|
 | 
						|
          <li><a href="#log_handlers">Special considerations for
 | 
						|
          logging handlers</a></li>
 | 
						|
        </ul>
 | 
						|
      </li>
 | 
						|
 | 
						|
      <li><a href="#pools">Resource allocation and resource
 | 
						|
      pools</a></li>
 | 
						|
 | 
						|
      <li>
 | 
						|
        <a href="#config">Configuration, commands and the like</a>
 | 
						|
 | 
						|
        <ul>
 | 
						|
          <li><a href="#per-dir">Per-directory configuration
 | 
						|
          structures</a></li>
 | 
						|
 | 
						|
          <li><a href="#commands">Command handling</a></li>
 | 
						|
 | 
						|
          <li><a href="#servconf">Side notes --- per-server
 | 
						|
          configuration, virtual servers, <em>etc</em>.</a></li>
 | 
						|
        </ul>
 | 
						|
      </li>
 | 
						|
    </ul>
 | 
						|
</div>
 | 
						|
<div id="quickview"><ul id="toc"><li><img alt="" src="../images/down.gif" /> <a href="#basics">Basic concepts</a></li>
 | 
						|
<li><img alt="" src="../images/down.gif" /> <a href="#handlers">How handlers work</a></li>
 | 
						|
<li><img alt="" src="../images/down.gif" /> <a href="#pools">Resource allocation and resource pools</a></li>
 | 
						|
<li><img alt="" src="../images/down.gif" /> <a href="#config">Configuration, commands and the like</a></li>
 | 
						|
</ul></div>
 | 
						|
<div class="top"><a href="#page-header"><img alt="top" src="../images/up.gif" /></a></div>
 | 
						|
<div class="section">
 | 
						|
<h2><a name="basics" id="basics">Basic concepts</a></h2>
 | 
						|
    <p>We begin with an overview of the basic concepts behind the API, and how
 | 
						|
    they are manifested in the code.</p>
 | 
						|
 | 
						|
    <h3><a name="HMR" id="HMR">Handlers, Modules, and Requests</a></h3>
 | 
						|
      <p>Apache breaks down request handling into a series of steps, more or
 | 
						|
      less the same way the Netscape server API does (although this API has a
 | 
						|
      few more stages than NetSite does, as hooks for stuff I thought might be
 | 
						|
      useful in the future). These are:</p>
 | 
						|
 | 
						|
      <ul>
 | 
						|
      <li>URI -> Filename translation</li>
 | 
						|
      <li>Auth ID checking [is the user who they say they are?]</li>
 | 
						|
      <li>Auth access checking [is the user authorized <em>here</em>?]</li>
 | 
						|
      <li>Access checking other than auth</li>
 | 
						|
      <li>Determining MIME type of the object requested</li>
 | 
						|
      <li>`Fixups' -- there aren't any of these yet, but the phase is intended
 | 
						|
      as a hook for possible extensions like <code class="directive"><a href="../mod/mod_env.html#setenv">SetEnv</a></code>, which don't really fit well elsewhere.</li>
 | 
						|
      <li>Actually sending a response back to the client.</li>
 | 
						|
      <li>Logging the request</li>
 | 
						|
      </ul>
 | 
						|
 | 
						|
      <p>These phases are handled by looking at each of a succession of
 | 
						|
      <em>modules</em>, looking to see if each of them has a handler for the
 | 
						|
      phase, and attempting invoking it if so. The handler can typically do one
 | 
						|
      of three things:</p>
 | 
						|
 | 
						|
      <ul>
 | 
						|
      <li><em>Handle</em> the request, and indicate that it has done so by
 | 
						|
      returning the magic constant <code>OK</code>.</li>
 | 
						|
 | 
						|
      <li><em>Decline</em> to handle the request, by returning the magic integer
 | 
						|
      constant <code>DECLINED</code>. In this case, the server behaves in all
 | 
						|
      respects as if the handler simply hadn't been there.</li>
 | 
						|
 | 
						|
      <li>Signal an error, by returning one of the HTTP error codes. This
 | 
						|
      terminates normal handling of the request, although an ErrorDocument may
 | 
						|
      be invoked to try to mop up, and it will be logged in any case.</li>
 | 
						|
      </ul>
 | 
						|
 | 
						|
      <p>Most phases are terminated by the first module that handles them;
 | 
						|
      however, for logging, `fixups', and non-access authentication checking,
 | 
						|
      all handlers always run (barring an error). Also, the response phase is
 | 
						|
      unique in that modules may declare multiple handlers for it, via a
 | 
						|
      dispatch table keyed on the MIME type of the requested object. Modules may
 | 
						|
      declare a response-phase handler which can handle <em>any</em> request,
 | 
						|
      by giving it the key <code>*/*</code> (<em>i.e.</em>, a wildcard MIME type
 | 
						|
      specification). However, wildcard handlers are only invoked if the server
 | 
						|
      has already tried and failed to find a more specific response handler for
 | 
						|
      the MIME type of the requested object (either none existed, or they all
 | 
						|
      declined).</p>
 | 
						|
 | 
						|
      <p>The handlers themselves are functions of one argument (a
 | 
						|
      <code>request_rec</code> structure. vide infra), which returns an integer,
 | 
						|
      as above.</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="moduletour" id="moduletour">A brief tour of a module</a></h3>
 | 
						|
      <p>At this point, we need to explain the structure of a module. Our
 | 
						|
      candidate will be one of the messier ones, the CGI module -- this handles
 | 
						|
      both CGI scripts and the <code class="directive"><a href="../mod/mod_alias.html#scriptalias">ScriptAlias</a></code> config file command. It's actually a great deal
 | 
						|
      more complicated than most modules, but if we're going to have only one
 | 
						|
      example, it might as well be the one with its fingers in every place.</p>
 | 
						|
 | 
						|
      <p>Let's begin with handlers. In order to handle the CGI scripts, the
 | 
						|
      module declares a response handler for them. Because of <code class="directive"><a href="../mod/mod_alias.html#scriptalias">ScriptAlias</a></code>, it also has handlers for the
 | 
						|
      name translation phase (to recognize <code class="directive"><a href="../mod/mod_alias.html#scriptalias">ScriptAlias</a></code>ed URIs), the type-checking phase (any
 | 
						|
      <code class="directive"><a href="../mod/mod_alias.html#scriptalias">ScriptAlias</a></code>ed request is typed
 | 
						|
      as a CGI script).</p>
 | 
						|
 | 
						|
      <p>The module needs to maintain some per (virtual) server information,
 | 
						|
      namely, the <code class="directive"><a href="../mod/mod_alias.html#scriptalias">ScriptAlias</a></code>es in
 | 
						|
      effect; the module structure therefore contains pointers to a functions
 | 
						|
      which builds these structures, and to another which combines two of them
 | 
						|
      (in case the main server and a virtual server both have <code class="directive"><a href="../mod/mod_alias.html#scriptalias">ScriptAlias</a></code>es declared).</p>
 | 
						|
 | 
						|
      <p>Finally, this module contains code to handle the <code class="directive"><a href="../mod/mod_alias.html#scriptalias">ScriptAlias</a></code> command itself. This particular
 | 
						|
      module only declares one command, but there could be more, so modules have
 | 
						|
      <em>command tables</em> which declare their commands, and describe where
 | 
						|
      they are permitted, and how they are to be invoked.</p>
 | 
						|
 | 
						|
      <p>A final note on the declared types of the arguments of some of these
 | 
						|
      commands: a <code>pool</code> is a pointer to a <em>resource pool</em>
 | 
						|
      structure; these are used by the server to keep track of the memory which
 | 
						|
      has been allocated, files opened, <em>etc.</em>, either to service a
 | 
						|
      particular request, or to handle the process of configuring itself. That
 | 
						|
      way, when the request is over (or, for the configuration pool, when the
 | 
						|
      server is restarting), the memory can be freed, and the files closed,
 | 
						|
      <em>en masse</em>, without anyone having to write explicit code to track
 | 
						|
      them all down and dispose of them. Also, a <code>cmd_parms</code>
 | 
						|
      structure contains various information about the config file being read,
 | 
						|
      and other status information, which is sometimes of use to the function
 | 
						|
      which processes a config-file command (such as <code class="directive"><a href="../mod/mod_alias.html#scriptalias">ScriptAlias</a></code>). With no further ado, the
 | 
						|
      module itself:</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        /* Declarations of handlers. */<br />
 | 
						|
        <br />
 | 
						|
        int translate_scriptalias (request_rec *);<br />
 | 
						|
        int type_scriptalias (request_rec *);<br />
 | 
						|
        int cgi_handler (request_rec *);<br />
 | 
						|
        <br />
 | 
						|
        /* Subsidiary dispatch table for response-phase <br />
 | 
						|
         * handlers, by MIME type */<br />
 | 
						|
        <br />
 | 
						|
        handler_rec cgi_handlers[] = {<br />
 | 
						|
        <span class="indent">
 | 
						|
          { "application/x-httpd-cgi", cgi_handler },<br />
 | 
						|
          { NULL }<br />
 | 
						|
        </span>
 | 
						|
        };<br />
 | 
						|
        <br />
 | 
						|
        /* Declarations of routines to manipulate the <br />
 | 
						|
         * module's configuration info.  Note that these are<br />
 | 
						|
         * returned, and passed in, as void *'s; the server<br />
 | 
						|
         * core keeps track of them, but it doesn't, and can't,<br />
 | 
						|
         * know their internal structure.<br />
 | 
						|
         */<br />
 | 
						|
        <br />
 | 
						|
        void *make_cgi_server_config (pool *);<br />
 | 
						|
        void *merge_cgi_server_config (pool *, void *, void *);<br />
 | 
						|
        <br />
 | 
						|
        /* Declarations of routines to handle config-file commands */<br />
 | 
						|
        <br />
 | 
						|
        extern char *script_alias(cmd_parms *, void *per_dir_config, char *fake,
 | 
						|
                                  char *real);<br />
 | 
						|
        <br />
 | 
						|
        command_rec cgi_cmds[] = {<br />
 | 
						|
        <span class="indent">
 | 
						|
          { "ScriptAlias", script_alias, NULL, RSRC_CONF, TAKE2,<br />
 | 
						|
          <span class="indent">"a fakename and a realname"},<br /></span>
 | 
						|
          { NULL }<br />
 | 
						|
        </span>
 | 
						|
        };<br />
 | 
						|
        <br />
 | 
						|
        module cgi_module = {
 | 
						|
</code></p><pre>  STANDARD_MODULE_STUFF,
 | 
						|
  NULL,                     /* initializer */
 | 
						|
  NULL,                     /* dir config creator */
 | 
						|
  NULL,                     /* dir merger */
 | 
						|
  make_cgi_server_config,   /* server config */
 | 
						|
  merge_cgi_server_config,  /* merge server config */
 | 
						|
  cgi_cmds,                 /* command table */
 | 
						|
  cgi_handlers,             /* handlers */
 | 
						|
  translate_scriptalias,    /* filename translation */
 | 
						|
  NULL,                     /* check_user_id */
 | 
						|
  NULL,                     /* check auth */
 | 
						|
  NULL,                     /* check access */
 | 
						|
  type_scriptalias,         /* type_checker */
 | 
						|
  NULL,                     /* fixups */
 | 
						|
  NULL,                     /* logger */
 | 
						|
  NULL                      /* header parser */
 | 
						|
};</pre></div>
 | 
						|
    
 | 
						|
</div><div class="top"><a href="#page-header"><img alt="top" src="../images/up.gif" /></a></div>
 | 
						|
<div class="section">
 | 
						|
<h2><a name="handlers" id="handlers">How handlers work</a></h2>
 | 
						|
    <p>The sole argument to handlers is a <code>request_rec</code> structure.
 | 
						|
    This structure describes a particular request which has been made to the
 | 
						|
    server, on behalf of a client. In most cases, each connection to the
 | 
						|
    client generates only one <code>request_rec</code> structure.</p>
 | 
						|
 | 
						|
    <h3><a name="req_tour" id="req_tour">A brief tour of the request_rec</a></h3>
 | 
						|
      <p>The <code>request_rec</code> contains pointers to a resource pool
 | 
						|
      which will be cleared when the server is finished handling the request;
 | 
						|
      to structures containing per-server and per-connection information, and
 | 
						|
      most importantly, information on the request itself.</p>
 | 
						|
 | 
						|
      <p>The most important such information is a small set of character strings
 | 
						|
      describing attributes of the object being requested, including its URI,
 | 
						|
      filename, content-type and content-encoding (these being filled in by the
 | 
						|
      translation and type-check handlers which handle the request,
 | 
						|
      respectively).</p>
 | 
						|
 | 
						|
      <p>Other commonly used data items are tables giving the MIME headers on
 | 
						|
      the client's original request, MIME headers to be sent back with the
 | 
						|
      response (which modules can add to at will), and environment variables for
 | 
						|
      any subprocesses which are spawned off in the course of servicing the
 | 
						|
      request. These tables are manipulated using the <code>ap_table_get</code>
 | 
						|
      and <code>ap_table_set</code> routines.</p>
 | 
						|
 | 
						|
      <div class="note">
 | 
						|
        <p>Note that the <code>Content-type</code> header value <em>cannot</em>
 | 
						|
        be set by module content-handlers using the <code>ap_table_*()</code>
 | 
						|
        routines. Rather, it is set by pointing the <code>content_type</code>
 | 
						|
        field in the <code>request_rec</code> structure to an appropriate
 | 
						|
        string. <em>e.g.</em>,</p>
 | 
						|
        <div class="example"><p><code>
 | 
						|
          r->content_type = "text/html";
 | 
						|
        </code></p></div>
 | 
						|
      </div>
 | 
						|
 | 
						|
      <p>Finally, there are pointers to two data structures which, in turn,
 | 
						|
      point to per-module configuration structures. Specifically, these hold
 | 
						|
      pointers to the data structures which the module has built to describe
 | 
						|
      the way it has been configured to operate in a given directory (via
 | 
						|
      <code>.htaccess</code> files or <code class="directive"><a href="../mod/core.html#directory"><Directory></a></code> sections), for private data it has built in the
 | 
						|
      course of servicing the request (so modules' handlers for one phase can
 | 
						|
      pass `notes' to their handlers for other phases). There is another such
 | 
						|
      configuration vector in the <code>server_rec</code> data structure pointed
 | 
						|
      to by the <code>request_rec</code>, which contains per (virtual) server
 | 
						|
      configuration data.</p>
 | 
						|
 | 
						|
      <p>Here is an abridged declaration, giving the fields most commonly
 | 
						|
      used:</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        struct request_rec {<br />
 | 
						|
        <br />
 | 
						|
        pool *pool;<br />
 | 
						|
        conn_rec *connection;<br />
 | 
						|
        server_rec *server;<br />
 | 
						|
        <br />
 | 
						|
        /* What object is being requested */<br />
 | 
						|
        <br />
 | 
						|
        char *uri;<br />
 | 
						|
        char *filename;<br />
 | 
						|
        char *path_info;
 | 
						|
</code></p><pre>char *args;           /* QUERY_ARGS, if any */
 | 
						|
struct stat finfo;    /* Set by server core;
 | 
						|
                       * st_mode set to zero if no such file */</pre><p><code>
 | 
						|
        char *content_type;<br />
 | 
						|
        char *content_encoding;<br />
 | 
						|
        <br />
 | 
						|
        /* MIME header environments, in and out. Also, <br />
 | 
						|
         * an array containing environment variables to<br />
 | 
						|
         * be passed to subprocesses, so people can write<br />
 | 
						|
         * modules to add to that environment.<br />
 | 
						|
         *<br />
 | 
						|
         * The difference between headers_out and <br />
 | 
						|
         * err_headers_out is that the latter are printed <br />
 | 
						|
         * even on error, and persist across internal<br />
 | 
						|
         * redirects (so the headers printed for <br />
 | 
						|
         * <code class="directive"><a href="../mod/core.html#errordocument">ErrorDocument</a></code> handlers will have
 | 
						|
         them).<br />
 | 
						|
         */<br />
 | 
						|
         <br />
 | 
						|
        table *headers_in;<br />
 | 
						|
        table *headers_out;<br />
 | 
						|
        table *err_headers_out;<br />
 | 
						|
        table *subprocess_env;<br />
 | 
						|
        <br />
 | 
						|
        /* Info about the request itself... */<br />
 | 
						|
        <br />
 | 
						|
</code></p><pre>int header_only;     /* HEAD request, as opposed to GET */
 | 
						|
char *protocol;      /* Protocol, as given to us, or HTTP/0.9 */
 | 
						|
char *method;        /* GET, HEAD, POST, <em>etc.</em> */
 | 
						|
int method_number;   /* M_GET, M_POST, <em>etc.</em> */
 | 
						|
 | 
						|
</pre><p><code>
 | 
						|
        /* Info for logging */<br />
 | 
						|
        <br />
 | 
						|
        char *the_request;<br />
 | 
						|
        int bytes_sent;<br />
 | 
						|
        <br />
 | 
						|
        /* A flag which modules can set, to indicate that<br />
 | 
						|
         * the data being returned is volatile, and clients<br />
 | 
						|
         * should be told not to cache it.<br />
 | 
						|
         */<br />
 | 
						|
        <br />
 | 
						|
        int no_cache;<br />
 | 
						|
        <br />
 | 
						|
        /* Various other config info which may change<br />
 | 
						|
         * with .htaccess files<br />
 | 
						|
         * These are config vectors, with one void*<br />
 | 
						|
         * pointer for each module (the thing pointed<br />
 | 
						|
         * to being the module's business).<br />
 | 
						|
         */<br />
 | 
						|
        <br />
 | 
						|
</code></p><pre>void *per_dir_config;   /* Options set in config files, <em>etc.</em> */
 | 
						|
void *request_config;   /* Notes on *this* request */</pre><p><code>
 | 
						|
        <br />
 | 
						|
        };
 | 
						|
      </code></p></div>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="req_orig" id="req_orig">Where request_rec structures come from</a></h3>
 | 
						|
      <p>Most <code>request_rec</code> structures are built by reading an HTTP
 | 
						|
      request from a client, and filling in the fields. However, there are a
 | 
						|
      few exceptions:</p>
 | 
						|
 | 
						|
      <ul>
 | 
						|
      <li>If the request is to an imagemap, a type map (<em>i.e.</em>, a
 | 
						|
      <code>*.var</code> file), or a CGI script which returned a local
 | 
						|
      `Location:', then the resource which the user requested is going to be
 | 
						|
      ultimately located by some URI other than what the client originally
 | 
						|
      supplied. In this case, the server does an <em>internal redirect</em>,
 | 
						|
      constructing a new <code>request_rec</code> for the new URI, and
 | 
						|
      processing it almost exactly as if the client had requested the new URI
 | 
						|
      directly.</li>
 | 
						|
 | 
						|
      <li>If some handler signaled an error, and an <code>ErrorDocument</code>
 | 
						|
      is in scope, the same internal redirect machinery comes into play.</li>
 | 
						|
 | 
						|
      <li><p>Finally, a handler occasionally needs to investigate `what would
 | 
						|
      happen if' some other request were run. For instance, the directory
 | 
						|
      indexing module needs to know what MIME type would be assigned to a
 | 
						|
      request for each directory entry, in order to figure out what icon to
 | 
						|
      use.</p>
 | 
						|
 | 
						|
      <p>Such handlers can construct a <em>sub-request</em>, using the
 | 
						|
      functions <code>ap_sub_req_lookup_file</code>,
 | 
						|
      <code>ap_sub_req_lookup_uri</code>, and <code>ap_sub_req_method_uri</code>;
 | 
						|
      these construct a new <code>request_rec</code> structure and processes it
 | 
						|
      as you would expect, up to but not including the point of actually sending
 | 
						|
      a response. (These functions skip over the access checks if the
 | 
						|
      sub-request is for a file in the same directory as the original
 | 
						|
      request).</p>
 | 
						|
 | 
						|
      <p>(Server-side includes work by building sub-requests and then actually
 | 
						|
      invoking the response handler for them, via the function
 | 
						|
      <code>ap_run_sub_req</code>).</p>
 | 
						|
      </li>
 | 
						|
      </ul>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="req_return" id="req_return">Handling requests, declining, and returning
 | 
						|
    error codes</a></h3>
 | 
						|
      <p>As discussed above, each handler, when invoked to handle a particular
 | 
						|
      <code>request_rec</code>, has to return an <code>int</code> to indicate
 | 
						|
      what happened. That can either be</p>
 | 
						|
 | 
						|
      <ul>
 | 
						|
      <li><code>OK</code> -- the request was handled successfully. This may or
 | 
						|
      may not terminate the phase.</li>
 | 
						|
 | 
						|
      <li><code>DECLINED</code> -- no erroneous condition exists, but the module
 | 
						|
      declines to handle the phase; the server tries to find another.</li>
 | 
						|
 | 
						|
      <li>an HTTP error code, which aborts handling of the request.</li>
 | 
						|
      </ul>
 | 
						|
 | 
						|
      <p>Note that if the error code returned is <code>REDIRECT</code>, then
 | 
						|
      the module should put a <code>Location</code> in the request's
 | 
						|
      <code>headers_out</code>, to indicate where the client should be
 | 
						|
      redirected <em>to</em>.</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="resp_handlers" id="resp_handlers">Special considerations for response
 | 
						|
    handlers</a></h3>
 | 
						|
      <p>Handlers for most phases do their work by simply setting a few fields
 | 
						|
      in the <code>request_rec</code> structure (or, in the case of access
 | 
						|
      checkers, simply by returning the correct error code). However, response
 | 
						|
      handlers have to actually send a request back to the client.</p>
 | 
						|
 | 
						|
      <p>They should begin by sending an HTTP response header, using the
 | 
						|
      function <code>ap_send_http_header</code>. (You don't have to do anything
 | 
						|
      special to skip sending the header for HTTP/0.9 requests; the function
 | 
						|
      figures out on its own that it shouldn't do anything). If the request is
 | 
						|
      marked <code>header_only</code>, that's all they should do; they should
 | 
						|
      return after that, without attempting any further output.</p>
 | 
						|
 | 
						|
      <p>Otherwise, they should produce a request body which responds to the
 | 
						|
      client as appropriate. The primitives for this are <code>ap_rputc</code>
 | 
						|
      and <code>ap_rprintf</code>, for internally generated output, and
 | 
						|
      <code>ap_send_fd</code>, to copy the contents of some <code>FILE *</code>
 | 
						|
      straight to the client.</p>
 | 
						|
 | 
						|
      <p>At this point, you should more or less understand the following piece
 | 
						|
      of code, which is the handler which handles <code>GET</code> requests
 | 
						|
      which have no more specific handler; it also shows how conditional
 | 
						|
      <code>GET</code>s can be handled, if it's desirable to do so in a
 | 
						|
      particular response handler -- <code>ap_set_last_modified</code> checks
 | 
						|
      against the <code>If-modified-since</code> value supplied by the client,
 | 
						|
      if any, and returns an appropriate code (which will, if nonzero, be
 | 
						|
      USE_LOCAL_COPY). No similar considerations apply for
 | 
						|
      <code>ap_set_content_length</code>, but it returns an error code for
 | 
						|
      symmetry.</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        int default_handler (request_rec *r)<br />
 | 
						|
        {<br />
 | 
						|
        <span class="indent">
 | 
						|
          int errstatus;<br />
 | 
						|
          FILE *f;<br />
 | 
						|
          <br />
 | 
						|
          if (r->method_number != M_GET) return DECLINED;<br />
 | 
						|
          if (r->finfo.st_mode == 0) return NOT_FOUND;<br />
 | 
						|
          <br />
 | 
						|
          if ((errstatus = ap_set_content_length (r, r->finfo.st_size))<br />
 | 
						|
              ||
 | 
						|
             (errstatus = ap_set_last_modified (r, r->finfo.st_mtime)))<br />
 | 
						|
          return errstatus;<br />
 | 
						|
          <br />
 | 
						|
          f = fopen (r->filename, "r");<br />
 | 
						|
          <br />
 | 
						|
          if (f == NULL) {<br />
 | 
						|
          <span class="indent">
 | 
						|
            log_reason("file permissions deny server access", r->filename, r);<br />
 | 
						|
            return FORBIDDEN;<br />
 | 
						|
          </span>
 | 
						|
          }<br />
 | 
						|
          <br />
 | 
						|
          register_timeout ("send", r);<br />
 | 
						|
          ap_send_http_header (r);<br />
 | 
						|
          <br />
 | 
						|
          if (!r->header_only) send_fd (f, r);<br />
 | 
						|
          ap_pfclose (r->pool, f);<br />
 | 
						|
          return OK;<br />
 | 
						|
        </span>
 | 
						|
        }
 | 
						|
      </code></p></div>
 | 
						|
 | 
						|
      <p>Finally, if all of this is too much of a challenge, there are a few
 | 
						|
      ways out of it. First off, as shown above, a response handler which has
 | 
						|
      not yet produced any output can simply return an error code, in which
 | 
						|
      case the server will automatically produce an error response. Secondly,
 | 
						|
      it can punt to some other handler by invoking
 | 
						|
      <code>ap_internal_redirect</code>, which is how the internal redirection
 | 
						|
      machinery discussed above is invoked. A response handler which has
 | 
						|
      internally redirected should always return <code>OK</code>.</p>
 | 
						|
 | 
						|
      <p>(Invoking <code>ap_internal_redirect</code> from handlers which are
 | 
						|
      <em>not</em> response handlers will lead to serious confusion).</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="auth_handlers" id="auth_handlers">Special considerations for authentication
 | 
						|
    handlers</a></h3>
 | 
						|
      <p>Stuff that should be discussed here in detail:</p>
 | 
						|
 | 
						|
      <ul>
 | 
						|
      <li>Authentication-phase handlers not invoked unless auth is
 | 
						|
      configured for the directory.</li>
 | 
						|
 | 
						|
      <li>Common auth configuration stored in the core per-dir
 | 
						|
      configuration; it has accessors <code>ap_auth_type</code>,
 | 
						|
      <code>ap_auth_name</code>, and <code>ap_requires</code>.</li>
 | 
						|
 | 
						|
      <li>Common routines, to handle the protocol end of things, at
 | 
						|
      least for HTTP basic authentication
 | 
						|
      (<code>ap_get_basic_auth_pw</code>, which sets the
 | 
						|
      <code>connection->user</code> structure field
 | 
						|
      automatically, and <code>ap_note_basic_auth_failure</code>,
 | 
						|
      which arranges for the proper <code>WWW-Authenticate:</code>
 | 
						|
      header to be sent back).</li>
 | 
						|
      </ul>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="log_handlers" id="log_handlers">Special considerations for logging
 | 
						|
    handlers</a></h3>
 | 
						|
      <p>When a request has internally redirected, there is the question of
 | 
						|
      what to log. Apache handles this by bundling the entire chain of redirects
 | 
						|
      into a list of <code>request_rec</code> structures which are threaded
 | 
						|
      through the <code>r->prev</code> and <code>r->next</code> pointers.
 | 
						|
      The <code>request_rec</code> which is passed to the logging handlers in
 | 
						|
      such cases is the one which was originally built for the initial request
 | 
						|
      from the client; note that the <code>bytes_sent</code> field will only be
 | 
						|
      correct in the last request in the chain (the one for which a response was
 | 
						|
      actually sent).</p>
 | 
						|
    
 | 
						|
</div><div class="top"><a href="#page-header"><img alt="top" src="../images/up.gif" /></a></div>
 | 
						|
<div class="section">
 | 
						|
<h2><a name="pools" id="pools">Resource allocation and resource pools</a></h2>
 | 
						|
    <p>One of the problems of writing and designing a server-pool server is
 | 
						|
    that of preventing leakage, that is, allocating resources (memory, open
 | 
						|
    files, <em>etc.</em>), without subsequently releasing them. The resource
 | 
						|
    pool machinery is designed to make it easy to prevent this from happening,
 | 
						|
    by allowing resource to be allocated in such a way that they are
 | 
						|
    <em>automatically</em> released when the server is done with them.</p>
 | 
						|
 | 
						|
    <p>The way this works is as follows: the memory which is allocated, file
 | 
						|
    opened, <em>etc.</em>, to deal with a particular request are tied to a
 | 
						|
    <em>resource pool</em> which is allocated for the request. The pool is a
 | 
						|
    data structure which itself tracks the resources in question.</p>
 | 
						|
 | 
						|
    <p>When the request has been processed, the pool is <em>cleared</em>. At
 | 
						|
    that point, all the memory associated with it is released for reuse, all
 | 
						|
    files associated with it are closed, and any other clean-up functions which
 | 
						|
    are associated with the pool are run. When this is over, we can be confident
 | 
						|
    that all the resource tied to the pool have been released, and that none of
 | 
						|
    them have leaked.</p>
 | 
						|
 | 
						|
    <p>Server restarts, and allocation of memory and resources for per-server
 | 
						|
    configuration, are handled in a similar way. There is a <em>configuration
 | 
						|
    pool</em>, which keeps track of resources which were allocated while reading
 | 
						|
    the server configuration files, and handling the commands therein (for
 | 
						|
    instance, the memory that was allocated for per-server module configuration,
 | 
						|
    log files and other files that were opened, and so forth). When the server
 | 
						|
    restarts, and has to reread the configuration files, the configuration pool
 | 
						|
    is cleared, and so the memory and file descriptors which were taken up by
 | 
						|
    reading them the last time are made available for reuse.</p>
 | 
						|
 | 
						|
    <p>It should be noted that use of the pool machinery isn't generally
 | 
						|
    obligatory, except for situations like logging handlers, where you really
 | 
						|
    need to register cleanups to make sure that the log file gets closed when
 | 
						|
    the server restarts (this is most easily done by using the function <code><a href="#pool-files">ap_pfopen</a></code>, which also arranges for the
 | 
						|
    underlying file descriptor to be closed before any child processes, such as
 | 
						|
    for CGI scripts, are <code>exec</code>ed), or in case you are using the
 | 
						|
    timeout machinery (which isn't yet even documented here). However, there are
 | 
						|
    two benefits to using it: resources allocated to a pool never leak (even if
 | 
						|
    you allocate a scratch string, and just forget about it); also, for memory
 | 
						|
    allocation, <code>ap_palloc</code> is generally faster than
 | 
						|
    <code>malloc</code>.</p>
 | 
						|
 | 
						|
    <p>We begin here by describing how memory is allocated to pools, and then
 | 
						|
    discuss how other resources are tracked by the resource pool machinery.</p>
 | 
						|
 | 
						|
    <h3>Allocation of memory in pools</h3>
 | 
						|
      <p>Memory is allocated to pools by calling the function
 | 
						|
      <code>ap_palloc</code>, which takes two arguments, one being a pointer to
 | 
						|
      a resource pool structure, and the other being the amount of memory to
 | 
						|
      allocate (in <code>char</code>s). Within handlers for handling requests,
 | 
						|
      the most common way of getting a resource pool structure is by looking at
 | 
						|
      the <code>pool</code> slot of the relevant <code>request_rec</code>; hence
 | 
						|
      the repeated appearance of the following idiom in module code:</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        int my_handler(request_rec *r)<br />
 | 
						|
        {<br />
 | 
						|
        <span class="indent">
 | 
						|
          struct my_structure *foo;<br />
 | 
						|
          ...<br />
 | 
						|
          <br />
 | 
						|
          foo = (foo *)ap_palloc (r->pool, sizeof(my_structure));<br />
 | 
						|
        </span>
 | 
						|
        }
 | 
						|
      </code></p></div>
 | 
						|
 | 
						|
      <p>Note that <em>there is no <code>ap_pfree</code></em> --
 | 
						|
      <code>ap_palloc</code>ed memory is freed only when the associated resource
 | 
						|
      pool is cleared. This means that <code>ap_palloc</code> does not have to
 | 
						|
      do as much accounting as <code>malloc()</code>; all it does in the typical
 | 
						|
      case is to round up the size, bump a pointer, and do a range check.</p>
 | 
						|
 | 
						|
      <p>(It also raises the possibility that heavy use of
 | 
						|
      <code>ap_palloc</code> could cause a server process to grow excessively
 | 
						|
      large. There are two ways to deal with this, which are dealt with below;
 | 
						|
      briefly, you can use <code>malloc</code>, and try to be sure that all of
 | 
						|
      the memory gets explicitly <code>free</code>d, or you can allocate a
 | 
						|
      sub-pool of the main pool, allocate your memory in the sub-pool, and clear
 | 
						|
      it out periodically. The latter technique is discussed in the section
 | 
						|
      on sub-pools below, and is used in the directory-indexing code, in order
 | 
						|
      to avoid excessive storage allocation when listing directories with
 | 
						|
      thousands of files).</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3>Allocating initialized memory</h3>
 | 
						|
      <p>There are functions which allocate initialized memory, and are
 | 
						|
      frequently useful. The function <code>ap_pcalloc</code> has the same
 | 
						|
      interface as <code>ap_palloc</code>, but clears out the memory it
 | 
						|
      allocates before it returns it. The function <code>ap_pstrdup</code>
 | 
						|
      takes a resource pool and a <code>char *</code> as arguments, and
 | 
						|
      allocates memory for a copy of the string the pointer points to, returning
 | 
						|
      a pointer to the copy. Finally <code>ap_pstrcat</code> is a varargs-style
 | 
						|
      function, which takes a pointer to a resource pool, and at least two
 | 
						|
      <code>char *</code> arguments, the last of which must be
 | 
						|
      <code>NULL</code>. It allocates enough memory to fit copies of each of
 | 
						|
      the strings, as a unit; for instance:</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        ap_pstrcat (r->pool, "foo", "/", "bar", NULL);
 | 
						|
      </code></p></div>
 | 
						|
 | 
						|
      <p>returns a pointer to 8 bytes worth of memory, initialized to
 | 
						|
      <code>"foo/bar"</code>.</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="pools-used" id="pools-used">Commonly-used pools in the Apache Web
 | 
						|
    server</a></h3>
 | 
						|
      <p>A pool is really defined by its lifetime more than anything else.
 | 
						|
      There are some static pools in http_main which are passed to various
 | 
						|
      non-http_main functions as arguments at opportune times. Here they
 | 
						|
      are:</p>
 | 
						|
 | 
						|
      <dl>
 | 
						|
      <dt><code>permanent_pool</code></dt>
 | 
						|
      <dd>never passed to anything else, this is the ancestor of all pools</dd>
 | 
						|
 | 
						|
      <dt><code>pconf</code></dt>
 | 
						|
      <dd>
 | 
						|
        <ul>
 | 
						|
          <li>subpool of permanent_pool</li>
 | 
						|
 | 
						|
          <li>created at the beginning of a config "cycle"; exists
 | 
						|
          until the server is terminated or restarts; passed to all
 | 
						|
          config-time routines, either via cmd->pool, or as the
 | 
						|
          "pool *p" argument on those which don't take pools</li>
 | 
						|
 | 
						|
          <li>passed to the module init() functions</li>
 | 
						|
        </ul>
 | 
						|
      </dd>
 | 
						|
 | 
						|
      <dt><code>ptemp</code></dt>
 | 
						|
      <dd>
 | 
						|
        <ul>
 | 
						|
          <li>sorry I lie, this pool isn't called this currently in
 | 
						|
          1.3, I renamed it this in my pthreads development. I'm
 | 
						|
          referring to the use of ptrans in the parent... contrast
 | 
						|
          this with the later definition of ptrans in the
 | 
						|
          child.</li>
 | 
						|
 | 
						|
          <li>subpool of permanent_pool</li>
 | 
						|
 | 
						|
          <li>created at the beginning of a config "cycle"; exists
 | 
						|
          until the end of config parsing; passed to config-time
 | 
						|
          routines <em>via</em> cmd->temp_pool. Somewhat of a
 | 
						|
          "bastard child" because it isn't available everywhere.
 | 
						|
          Used for temporary scratch space which may be needed by
 | 
						|
          some config routines but which is deleted at the end of
 | 
						|
          config.</li>
 | 
						|
        </ul>
 | 
						|
      </dd>
 | 
						|
 | 
						|
      <dt><code>pchild</code></dt>
 | 
						|
      <dd>
 | 
						|
        <ul>
 | 
						|
          <li>subpool of permanent_pool</li>
 | 
						|
 | 
						|
          <li>created when a child is spawned (or a thread is
 | 
						|
          created); lives until that child (thread) is
 | 
						|
          destroyed</li>
 | 
						|
 | 
						|
          <li>passed to the module child_init functions</li>
 | 
						|
 | 
						|
          <li>destruction happens right after the child_exit
 | 
						|
          functions are called... (which may explain why I think
 | 
						|
          child_exit is redundant and unneeded)</li>
 | 
						|
        </ul>
 | 
						|
      </dd>
 | 
						|
 | 
						|
      <dt><code>ptrans</code></dt>
 | 
						|
      <dd>
 | 
						|
        <ul>
 | 
						|
          <li>should be a subpool of pchild, but currently is a
 | 
						|
          subpool of permanent_pool, see above</li>
 | 
						|
 | 
						|
          <li>cleared by the child before going into the accept()
 | 
						|
          loop to receive a connection</li>
 | 
						|
 | 
						|
          <li>used as connection->pool</li>
 | 
						|
        </ul>
 | 
						|
      </dd>
 | 
						|
 | 
						|
      <dt><code>r->pool</code></dt>
 | 
						|
      <dd>
 | 
						|
        <ul>
 | 
						|
          <li>for the main request this is a subpool of
 | 
						|
          connection->pool; for subrequests it is a subpool of
 | 
						|
          the parent request's pool.</li>
 | 
						|
 | 
						|
          <li>exists until the end of the request (<em>i.e.</em>,
 | 
						|
          ap_destroy_sub_req, or in child_main after
 | 
						|
          process_request has finished)</li>
 | 
						|
 | 
						|
          <li>note that r itself is allocated from r->pool;
 | 
						|
          <em>i.e.</em>, r->pool is first created and then r is
 | 
						|
          the first thing palloc()d from it</li>
 | 
						|
        </ul>
 | 
						|
      </dd>
 | 
						|
      </dl>
 | 
						|
 | 
						|
      <p>For almost everything folks do, <code>r->pool</code> is the pool to
 | 
						|
      use. But you can see how other lifetimes, such as pchild, are useful to
 | 
						|
      some modules... such as modules that need to open a database connection
 | 
						|
      once per child, and wish to clean it up when the child dies.</p>
 | 
						|
 | 
						|
      <p>You can also see how some bugs have manifested themself, such as
 | 
						|
      setting <code>connection->user</code> to a value from
 | 
						|
      <code>r->pool</code> -- in this case connection exists for the
 | 
						|
      lifetime of <code>ptrans</code>, which is longer than
 | 
						|
      <code>r->pool</code> (especially if <code>r->pool</code> is a
 | 
						|
      subrequest!). So the correct thing to do is to allocate from
 | 
						|
      <code>connection->pool</code>.</p>
 | 
						|
 | 
						|
      <p>And there was another interesting bug in <code class="module"><a href="../mod/mod_include.html">mod_include</a></code>
 | 
						|
      / <code class="module"><a href="../mod/mod_cgi.html">mod_cgi</a></code>. You'll see in those that they do this test
 | 
						|
      to decide if they should use <code>r->pool</code> or
 | 
						|
      <code>r->main->pool</code>. In this case the resource that they are
 | 
						|
      registering for cleanup is a child process. If it were registered in
 | 
						|
      <code>r->pool</code>, then the code would <code>wait()</code> for the
 | 
						|
      child when the subrequest finishes. With <code class="module"><a href="../mod/mod_include.html">mod_include</a></code> this
 | 
						|
      could be any old <code>#include</code>, and the delay can be up to 3
 | 
						|
      seconds... and happened quite frequently. Instead the subprocess is
 | 
						|
      registered in <code>r->main->pool</code> which causes it to be
 | 
						|
      cleaned up when the entire request is done -- <em>i.e.</em>, after the
 | 
						|
      output has been sent to the client and logging has happened.</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="pool-files" id="pool-files">Tracking open files, etc.</a></h3>
 | 
						|
      <p>As indicated above, resource pools are also used to track other sorts
 | 
						|
      of resources besides memory. The most common are open files. The routine
 | 
						|
      which is typically used for this is <code>ap_pfopen</code>, which takes a
 | 
						|
      resource pool and two strings as arguments; the strings are the same as
 | 
						|
      the typical arguments to <code>fopen</code>, <em>e.g.</em>,</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        ...<br />
 | 
						|
        FILE *f = ap_pfopen (r->pool, r->filename, "r");<br />
 | 
						|
        <br />
 | 
						|
        if (f == NULL) { ... } else { ... }<br />
 | 
						|
      </code></p></div>
 | 
						|
 | 
						|
      <p>There is also a <code>ap_popenf</code> routine, which parallels the
 | 
						|
      lower-level <code>open</code> system call. Both of these routines arrange
 | 
						|
      for the file to be closed when the resource pool in question is
 | 
						|
      cleared.</p>
 | 
						|
 | 
						|
      <p>Unlike the case for memory, there <em>are</em> functions to close files
 | 
						|
      allocated with <code>ap_pfopen</code>, and <code>ap_popenf</code>, namely
 | 
						|
      <code>ap_pfclose</code> and <code>ap_pclosef</code>. (This is because, on
 | 
						|
      many systems, the number of files which a single process can have open is
 | 
						|
      quite limited). It is important to use these functions to close files
 | 
						|
      allocated with <code>ap_pfopen</code> and <code>ap_popenf</code>, since to
 | 
						|
      do otherwise could cause fatal errors on systems such as Linux, which
 | 
						|
      react badly if the same <code>FILE*</code> is closed more than once.</p>
 | 
						|
 | 
						|
      <p>(Using the <code>close</code> functions is not mandatory, since the
 | 
						|
      file will eventually be closed regardless, but you should consider it in
 | 
						|
      cases where your module is opening, or could open, a lot of files).</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3>Other sorts of resources -- cleanup functions</h3>
 | 
						|
      <p>More text goes here. Describe the cleanup primitives in terms of
 | 
						|
      which the file stuff is implemented; also, <code>spawn_process</code>.</p>
 | 
						|
 | 
						|
      <p>Pool cleanups live until <code>clear_pool()</code> is called:
 | 
						|
      <code>clear_pool(a)</code> recursively calls <code>destroy_pool()</code>
 | 
						|
      on all subpools of <code>a</code>; then calls all the cleanups for
 | 
						|
      <code>a</code>; then releases all the memory for <code>a</code>.
 | 
						|
      <code>destroy_pool(a)</code> calls <code>clear_pool(a)</code> and then
 | 
						|
      releases the pool structure itself. <em>i.e.</em>,
 | 
						|
      <code>clear_pool(a)</code> doesn't delete <code>a</code>, it just frees
 | 
						|
      up all the resources and you can start using it again immediately.</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3>Fine control -- creating and dealing with sub-pools, with
 | 
						|
    a note on sub-requests</h3>
 | 
						|
      <p>On rare occasions, too-free use of <code>ap_palloc()</code> and the
 | 
						|
      associated primitives may result in undesirably profligate resource
 | 
						|
      allocation. You can deal with such a case by creating a <em>sub-pool</em>,
 | 
						|
      allocating within the sub-pool rather than the main pool, and clearing or
 | 
						|
      destroying the sub-pool, which releases the resources which were
 | 
						|
      associated with it. (This really <em>is</em> a rare situation; the only
 | 
						|
      case in which it comes up in the standard module set is in case of listing
 | 
						|
      directories, and then only with <em>very</em> large directories.
 | 
						|
      Unnecessary use of the primitives discussed here can hair up your code
 | 
						|
      quite a bit, with very little gain).</p>
 | 
						|
 | 
						|
      <p>The primitive for creating a sub-pool is <code>ap_make_sub_pool</code>,
 | 
						|
      which takes another pool (the parent pool) as an argument. When the main
 | 
						|
      pool is cleared, the sub-pool will be destroyed. The sub-pool may also be
 | 
						|
      cleared or destroyed at any time, by calling the functions
 | 
						|
      <code>ap_clear_pool</code> and <code>ap_destroy_pool</code>, respectively.
 | 
						|
      (The difference is that <code>ap_clear_pool</code> frees resources
 | 
						|
      associated with the pool, while <code>ap_destroy_pool</code> also
 | 
						|
      deallocates the pool itself. In the former case, you can allocate new
 | 
						|
      resources within the pool, and clear it again, and so forth; in the
 | 
						|
      latter case, it is simply gone).</p>
 | 
						|
 | 
						|
      <p>One final note -- sub-requests have their own resource pools, which are
 | 
						|
      sub-pools of the resource pool for the main request. The polite way to
 | 
						|
      reclaim the resources associated with a sub request which you have
 | 
						|
      allocated (using the <code>ap_sub_req_...</code> functions) is
 | 
						|
      <code>ap_destroy_sub_req</code>, which frees the resource pool. Before
 | 
						|
      calling this function, be sure to copy anything that you care about which
 | 
						|
      might be allocated in the sub-request's resource pool into someplace a
 | 
						|
      little less volatile (for instance, the filename in its
 | 
						|
      <code>request_rec</code> structure).</p>
 | 
						|
 | 
						|
      <p>(Again, under most circumstances, you shouldn't feel obliged to call
 | 
						|
      this function; only 2K of memory or so are allocated for a typical sub
 | 
						|
      request, and it will be freed anyway when the main request pool is
 | 
						|
      cleared. It is only when you are allocating many, many sub-requests for a
 | 
						|
      single main request that you should seriously consider the
 | 
						|
      <code>ap_destroy_...</code> functions).</p>
 | 
						|
    
 | 
						|
</div><div class="top"><a href="#page-header"><img alt="top" src="../images/up.gif" /></a></div>
 | 
						|
<div class="section">
 | 
						|
<h2><a name="config" id="config">Configuration, commands and the like</a></h2>
 | 
						|
    <p>One of the design goals for this server was to maintain external
 | 
						|
    compatibility with the NCSA 1.3 server --- that is, to read the same
 | 
						|
    configuration files, to process all the directives therein correctly, and
 | 
						|
    in general to be a drop-in replacement for NCSA. On the other hand, another
 | 
						|
    design goal was to move as much of the server's functionality into modules
 | 
						|
    which have as little as possible to do with the monolithic server core. The
 | 
						|
    only way to reconcile these goals is to move the handling of most commands
 | 
						|
    from the central server into the modules.</p>
 | 
						|
 | 
						|
    <p>However, just giving the modules command tables is not enough to divorce
 | 
						|
    them completely from the server core. The server has to remember the
 | 
						|
    commands in order to act on them later. That involves maintaining data which
 | 
						|
    is private to the modules, and which can be either per-server, or
 | 
						|
    per-directory. Most things are per-directory, including in particular access
 | 
						|
    control and authorization information, but also information on how to
 | 
						|
    determine file types from suffixes, which can be modified by
 | 
						|
    <code class="directive"><a href="../mod/mod_mime.html#addtype">AddType</a></code> and <code class="directive"><a href="../mod/core.html#forcetype">ForceType</a></code> directives, and so forth. In general,
 | 
						|
    the governing philosophy is that anything which <em>can</em> be made
 | 
						|
    configurable by directory should be; per-server information is generally
 | 
						|
    used in the standard set of modules for information like
 | 
						|
    <code class="directive"><a href="../mod/mod_alias.html#alias">Alias</a></code>es and <code class="directive"><a href="../mod/mod_alias.html#redirect">Redirect</a></code>s which come into play before the
 | 
						|
    request is tied to a particular place in the underlying file system.</p>
 | 
						|
 | 
						|
    <p>Another requirement for emulating the NCSA server is being able to handle
 | 
						|
    the per-directory configuration files, generally called
 | 
						|
    <code>.htaccess</code> files, though even in the NCSA server they can
 | 
						|
    contain directives which have nothing at all to do with access control.
 | 
						|
    Accordingly, after URI -> filename translation, but before performing any
 | 
						|
    other phase, the server walks down the directory hierarchy of the underlying
 | 
						|
    filesystem, following the translated pathname, to read any
 | 
						|
    <code>.htaccess</code> files which might be present. The information which
 | 
						|
    is read in then has to be <em>merged</em> with the applicable information
 | 
						|
    from the server's own config files (either from the <code class="directive"><a href="../mod/core.html#directory"><Directory></a></code> sections in
 | 
						|
    <code>access.conf</code>, or from defaults in <code>srm.conf</code>, which
 | 
						|
    actually behaves for most purposes almost exactly like <code><Directory
 | 
						|
    /></code>).</p>
 | 
						|
 | 
						|
    <p>Finally, after having served a request which involved reading
 | 
						|
    <code>.htaccess</code> files, we need to discard the storage allocated for
 | 
						|
    handling them. That is solved the same way it is solved wherever else
 | 
						|
    similar problems come up, by tying those structures to the per-transaction
 | 
						|
    resource pool.</p>
 | 
						|
 | 
						|
    <h3><a name="per-dir" id="per-dir">Per-directory configuration structures</a></h3>
 | 
						|
      <p>Let's look out how all of this plays out in <code>mod_mime.c</code>,
 | 
						|
      which defines the file typing handler which emulates the NCSA server's
 | 
						|
      behavior of determining file types from suffixes. What we'll be looking
 | 
						|
      at, here, is the code which implements the <code class="directive"><a href="../mod/mod_mime.html#addtype">AddType</a></code> and <code class="directive"><a href="../mod/mod_mime.html#addencoding">AddEncoding</a></code> commands. These commands can appear in
 | 
						|
      <code>.htaccess</code> files, so they must be handled in the module's
 | 
						|
      private per-directory data, which in fact, consists of two separate
 | 
						|
      tables for MIME types and encoding information, and is declared as
 | 
						|
      follows:</p>
 | 
						|
 | 
						|
      <div class="example"><pre>typedef struct {
 | 
						|
    table *forced_types;      /* Additional AddTyped stuff */
 | 
						|
    table *encoding_types;    /* Added with AddEncoding... */
 | 
						|
} mime_dir_config;</pre></div>
 | 
						|
 | 
						|
      <p>When the server is reading a configuration file, or <code class="directive"><a href="../mod/core.html#directory"><Directory></a></code> section, which includes
 | 
						|
      one of the MIME module's commands, it needs to create a
 | 
						|
      <code>mime_dir_config</code> structure, so those commands have something
 | 
						|
      to act on. It does this by invoking the function it finds in the module's
 | 
						|
      `create per-dir config slot', with two arguments: the name of the
 | 
						|
      directory to which this configuration information applies (or
 | 
						|
      <code>NULL</code> for <code>srm.conf</code>), and a pointer to a
 | 
						|
      resource pool in which the allocation should happen.</p>
 | 
						|
 | 
						|
      <p>(If we are reading a <code>.htaccess</code> file, that resource pool
 | 
						|
      is the per-request resource pool for the request; otherwise it is a
 | 
						|
      resource pool which is used for configuration data, and cleared on
 | 
						|
      restarts. Either way, it is important for the structure being created to
 | 
						|
      vanish when the pool is cleared, by registering a cleanup on the pool if
 | 
						|
      necessary).</p>
 | 
						|
 | 
						|
      <p>For the MIME module, the per-dir config creation function just
 | 
						|
      <code>ap_palloc</code>s the structure above, and a creates a couple of
 | 
						|
      tables to fill it. That looks like this:</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        void *create_mime_dir_config (pool *p, char *dummy)<br />
 | 
						|
        {<br />
 | 
						|
        <span class="indent">
 | 
						|
          mime_dir_config *new =<br />
 | 
						|
          <span class="indent">
 | 
						|
           (mime_dir_config *) ap_palloc (p, sizeof(mime_dir_config));<br />
 | 
						|
          </span>
 | 
						|
          <br />
 | 
						|
          new->forced_types = ap_make_table (p, 4);<br />
 | 
						|
          new->encoding_types = ap_make_table (p, 4);<br />
 | 
						|
          <br />
 | 
						|
          return new;<br />
 | 
						|
        </span>
 | 
						|
        }
 | 
						|
      </code></p></div>
 | 
						|
 | 
						|
      <p>Now, suppose we've just read in a <code>.htaccess</code> file. We
 | 
						|
      already have the per-directory configuration structure for the next
 | 
						|
      directory up in the hierarchy. If the <code>.htaccess</code> file we just
 | 
						|
      read in didn't have any <code class="directive"><a href="../mod/mod_mime.html#addtype">AddType</a></code>
 | 
						|
      or <code class="directive"><a href="../mod/mod_mime.html#addencoding">AddEncoding</a></code> commands, its
 | 
						|
      per-directory config structure for the MIME module is still valid, and we
 | 
						|
      can just use it. Otherwise, we need to merge the two structures
 | 
						|
      somehow.</p>
 | 
						|
 | 
						|
      <p>To do that, the server invokes the module's per-directory config merge
 | 
						|
      function, if one is present. That function takes three arguments: the two
 | 
						|
      structures being merged, and a resource pool in which to allocate the
 | 
						|
      result. For the MIME module, all that needs to be done is overlay the
 | 
						|
      tables from the new per-directory config structure with those from the
 | 
						|
      parent:</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        void *merge_mime_dir_configs (pool *p, void *parent_dirv, void *subdirv)<br />
 | 
						|
        {<br />
 | 
						|
        <span class="indent">
 | 
						|
          mime_dir_config *parent_dir = (mime_dir_config *)parent_dirv;<br />
 | 
						|
          mime_dir_config *subdir = (mime_dir_config *)subdirv;<br />
 | 
						|
          mime_dir_config *new =<br />
 | 
						|
          <span class="indent">
 | 
						|
            (mime_dir_config *)ap_palloc (p, sizeof(mime_dir_config));<br />
 | 
						|
          </span>
 | 
						|
          <br />
 | 
						|
          new->forced_types = ap_overlay_tables (p, subdir->forced_types,<br />
 | 
						|
          <span class="indent">
 | 
						|
            parent_dir->forced_types);<br />
 | 
						|
          </span>
 | 
						|
          new->encoding_types = ap_overlay_tables (p, subdir->encoding_types,<br />
 | 
						|
          <span class="indent">
 | 
						|
            parent_dir->encoding_types);<br />
 | 
						|
          </span>
 | 
						|
          <br />
 | 
						|
          return new;<br />
 | 
						|
        </span>
 | 
						|
        }
 | 
						|
      </code></p></div>
 | 
						|
 | 
						|
      <p>As a note -- if there is no per-directory merge function present, the
 | 
						|
      server will just use the subdirectory's configuration info, and ignore
 | 
						|
      the parent's. For some modules, that works just fine (<em>e.g.</em>, for
 | 
						|
      the includes module, whose per-directory configuration information
 | 
						|
      consists solely of the state of the <code>XBITHACK</code>), and for those
 | 
						|
      modules, you can just not declare one, and leave the corresponding
 | 
						|
      structure slot in the module itself <code>NULL</code>.</p>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="commands" id="commands">Command handling</a></h3>
 | 
						|
      <p>Now that we have these structures, we need to be able to figure out how
 | 
						|
      to fill them. That involves processing the actual <code class="directive"><a href="../mod/mod_mime.html#addtype">AddType</a></code> and <code class="directive"><a href="../mod/mod_mime.html#addencoding">AddEncoding</a></code> commands. To find commands, the server looks in
 | 
						|
      the module's command table. That table contains information on how many
 | 
						|
      arguments the commands take, and in what formats, where it is permitted,
 | 
						|
      and so forth. That information is sufficient to allow the server to invoke
 | 
						|
      most command-handling functions with pre-parsed arguments. Without further
 | 
						|
      ado, let's look at the <code class="directive"><a href="../mod/mod_mime.html#addtype">AddType</a></code>
 | 
						|
      command handler, which looks like this (the <code class="directive"><a href="../mod/mod_mime.html#addencoding">AddEncoding</a></code> command looks basically the same, and won't be
 | 
						|
      shown here):</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        char *add_type(cmd_parms *cmd, mime_dir_config *m, char *ct, char *ext)<br />
 | 
						|
        {<br />
 | 
						|
        <span class="indent">
 | 
						|
          if (*ext == '.') ++ext;<br />
 | 
						|
          ap_table_set (m->forced_types, ext, ct);<br />
 | 
						|
          return NULL;<br />
 | 
						|
        </span>
 | 
						|
        }
 | 
						|
      </code></p></div>
 | 
						|
 | 
						|
      <p>This command handler is unusually simple. As you can see, it takes
 | 
						|
      four arguments, two of which are pre-parsed arguments, the third being the
 | 
						|
      per-directory configuration structure for the module in question, and the
 | 
						|
      fourth being a pointer to a <code>cmd_parms</code> structure. That
 | 
						|
      structure contains a bunch of arguments which are frequently of use to
 | 
						|
      some, but not all, commands, including a resource pool (from which memory
 | 
						|
      can be allocated, and to which cleanups should be tied), and the (virtual)
 | 
						|
      server being configured, from which the module's per-server configuration
 | 
						|
      data can be obtained if required.</p>
 | 
						|
 | 
						|
      <p>Another way in which this particular command handler is unusually
 | 
						|
      simple is that there are no error conditions which it can encounter. If
 | 
						|
      there were, it could return an error message instead of <code>NULL</code>;
 | 
						|
      this causes an error to be printed out on the server's
 | 
						|
      <code>stderr</code>, followed by a quick exit, if it is in the main config
 | 
						|
      files; for a <code>.htaccess</code> file, the syntax error is logged in
 | 
						|
      the server error log (along with an indication of where it came from), and
 | 
						|
      the request is bounced with a server error response (HTTP error status,
 | 
						|
      code 500).</p>
 | 
						|
 | 
						|
      <p>The MIME module's command table has entries for these commands, which
 | 
						|
      look like this:</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        command_rec mime_cmds[] = {<br />
 | 
						|
        <span class="indent">
 | 
						|
          { "AddType", add_type, NULL, OR_FILEINFO, TAKE2,<br />
 | 
						|
          <span class="indent">"a mime type followed by a file extension" },<br /></span>
 | 
						|
          { "AddEncoding", add_encoding, NULL, OR_FILEINFO, TAKE2,<br />
 | 
						|
          <span class="indent">
 | 
						|
          "an encoding (<em>e.g.</em>, gzip), followed by a file extension" },<br />
 | 
						|
          </span>
 | 
						|
          { NULL }<br />
 | 
						|
        </span>
 | 
						|
        };
 | 
						|
      </code></p></div>
 | 
						|
 | 
						|
      <p>The entries in these tables are:</p>
 | 
						|
      <ul>
 | 
						|
      <li>The name of the command</li>
 | 
						|
      <li>The function which handles it</li>
 | 
						|
      <li>a <code>(void *)</code> pointer, which is passed in the
 | 
						|
      <code>cmd_parms</code> structure to the command handler ---
 | 
						|
      this is useful in case many similar commands are handled by
 | 
						|
      the same function.</li>
 | 
						|
 | 
						|
      <li>A bit mask indicating where the command may appear. There
 | 
						|
      are mask bits corresponding to each
 | 
						|
      <code>AllowOverride</code> option, and an additional mask
 | 
						|
      bit, <code>RSRC_CONF</code>, indicating that the command may
 | 
						|
      appear in the server's own config files, but <em>not</em> in
 | 
						|
      any <code>.htaccess</code> file.</li>
 | 
						|
 | 
						|
      <li>A flag indicating how many arguments the command handler
 | 
						|
      wants pre-parsed, and how they should be passed in.
 | 
						|
      <code>TAKE2</code> indicates two pre-parsed arguments. Other
 | 
						|
      options are <code>TAKE1</code>, which indicates one
 | 
						|
      pre-parsed argument, <code>FLAG</code>, which indicates that
 | 
						|
      the argument should be <code>On</code> or <code>Off</code>,
 | 
						|
      and is passed in as a boolean flag, <code>RAW_ARGS</code>,
 | 
						|
      which causes the server to give the command the raw, unparsed
 | 
						|
      arguments (everything but the command name itself). There is
 | 
						|
      also <code>ITERATE</code>, which means that the handler looks
 | 
						|
      the same as <code>TAKE1</code>, but that if multiple
 | 
						|
      arguments are present, it should be called multiple times,
 | 
						|
      and finally <code>ITERATE2</code>, which indicates that the
 | 
						|
      command handler looks like a <code>TAKE2</code>, but if more
 | 
						|
      arguments are present, then it should be called multiple
 | 
						|
      times, holding the first argument constant.</li>
 | 
						|
 | 
						|
      <li>Finally, we have a string which describes the arguments
 | 
						|
      that should be present. If the arguments in the actual config
 | 
						|
      file are not as required, this string will be used to help
 | 
						|
      give a more specific error message. (You can safely leave
 | 
						|
      this <code>NULL</code>).</li>
 | 
						|
      </ul>
 | 
						|
 | 
						|
      <p>Finally, having set this all up, we have to use it. This is ultimately
 | 
						|
      done in the module's handlers, specifically for its file-typing handler,
 | 
						|
      which looks more or less like this; note that the per-directory
 | 
						|
      configuration structure is extracted from the <code>request_rec</code>'s
 | 
						|
      per-directory configuration vector by using the
 | 
						|
      <code>ap_get_module_config</code> function.</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        int find_ct(request_rec *r)<br />
 | 
						|
        {<br />
 | 
						|
        <span class="indent">
 | 
						|
          int i;<br />
 | 
						|
          char *fn = ap_pstrdup (r->pool, r->filename);<br />
 | 
						|
          mime_dir_config *conf = (mime_dir_config *)<br />
 | 
						|
          <span class="indent">
 | 
						|
            ap_get_module_config(r->per_dir_config, &mime_module);<br />
 | 
						|
          </span>
 | 
						|
          char *type;<br />
 | 
						|
          <br />
 | 
						|
          if (S_ISDIR(r->finfo.st_mode)) {<br />
 | 
						|
          <span class="indent">
 | 
						|
            r->content_type = DIR_MAGIC_TYPE;<br />
 | 
						|
            return OK;<br />
 | 
						|
          </span>
 | 
						|
          }<br />
 | 
						|
          <br />
 | 
						|
          if((i=ap_rind(fn,'.')) < 0) return DECLINED;<br />
 | 
						|
          ++i;<br />
 | 
						|
          <br />
 | 
						|
          if ((type = ap_table_get (conf->encoding_types, &fn[i])))<br />
 | 
						|
          {<br />
 | 
						|
          <span class="indent">
 | 
						|
            r->content_encoding = type;<br />
 | 
						|
            <br />
 | 
						|
            /* go back to previous extension to try to use it as a type */<br />
 | 
						|
            fn[i-1] = '\0';<br />
 | 
						|
            if((i=ap_rind(fn,'.')) < 0) return OK;<br />
 | 
						|
            ++i;<br />
 | 
						|
          </span>
 | 
						|
          }<br />
 | 
						|
          <br />
 | 
						|
          if ((type = ap_table_get (conf->forced_types, &fn[i])))<br />
 | 
						|
          {<br />
 | 
						|
          <span class="indent">
 | 
						|
            r->content_type = type;<br />
 | 
						|
          </span>
 | 
						|
          }<br />
 | 
						|
          <br />
 | 
						|
          return OK;
 | 
						|
        </span>
 | 
						|
        }
 | 
						|
      </code></p></div>
 | 
						|
    
 | 
						|
 | 
						|
    <h3><a name="servconf" id="servconf">Side notes -- per-server configuration,
 | 
						|
    virtual servers, <em>etc</em>.</a></h3>
 | 
						|
      <p>The basic ideas behind per-server module configuration are basically
 | 
						|
      the same as those for per-directory configuration; there is a creation
 | 
						|
      function and a merge function, the latter being invoked where a virtual
 | 
						|
      server has partially overridden the base server configuration, and a
 | 
						|
      combined structure must be computed. (As with per-directory configuration,
 | 
						|
      the default if no merge function is specified, and a module is configured
 | 
						|
      in some virtual server, is that the base configuration is simply
 | 
						|
      ignored).</p>
 | 
						|
 | 
						|
      <p>The only substantial difference is that when a command needs to
 | 
						|
      configure the per-server private module data, it needs to go to the
 | 
						|
      <code>cmd_parms</code> data to get at it. Here's an example, from the
 | 
						|
      alias module, which also indicates how a syntax error can be returned
 | 
						|
      (note that the per-directory configuration argument to the command
 | 
						|
      handler is declared as a dummy, since the module doesn't actually have
 | 
						|
      per-directory config data):</p>
 | 
						|
 | 
						|
      <div class="example"><p><code>
 | 
						|
        char *add_redirect(cmd_parms *cmd, void *dummy, char *f, char *url)<br />
 | 
						|
        {<br />
 | 
						|
        <span class="indent">
 | 
						|
          server_rec *s = cmd->server;<br />
 | 
						|
          alias_server_conf *conf = (alias_server_conf *)<br />
 | 
						|
          <span class="indent">
 | 
						|
            ap_get_module_config(s->module_config,&alias_module);<br />
 | 
						|
          </span>
 | 
						|
          alias_entry *new = ap_push_array (conf->redirects);<br />
 | 
						|
          <br />
 | 
						|
          if (!ap_is_url (url)) return "Redirect to non-URL";<br />
 | 
						|
          <br />
 | 
						|
          new->fake = f; new->real = url;<br />
 | 
						|
          return NULL;<br />
 | 
						|
        </span>
 | 
						|
        }
 | 
						|
      </code></p></div>
 | 
						|
    
 | 
						|
</div></div>
 | 
						|
<div class="bottomlang">
 | 
						|
<p><span>Available Languages: </span><a href="../en/developer/API.html" title="English"> en </a></p>
 | 
						|
</div><div id="footer">
 | 
						|
<p class="apache">Copyright 2011 The Apache Software Foundation.<br />Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p>
 | 
						|
<p class="menu"><a href="../mod/">Modules</a> | <a href="../mod/directives.html">Directives</a> | <a href="../faq/">FAQ</a> | <a href="../glossary.html">Glossary</a> | <a href="../sitemap.html">Sitemap</a></p></div>
 | 
						|
</body></html> |