mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-30 04:26:45 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			201 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			201 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/perl -w
 | |
| 
 | |
| # Fix the output of `makeinfo --docbook` version 4.0c
 | |
| # Convert the broken docbook output to well-formed XML that conforms to the O'Reilly idiom
 | |
| # See code for detailed comments
 | |
| # Authors: Arjen Lentz and Zak Greant (original code by Jeremy Cole)
 | |
| 
 | |
| use strict;
 | |
| 
 | |
| my $data  = '';
 | |
| my @apx   = ();
 | |
| my $apx   = '';
 | |
| my @nodes = ();
 | |
| my $nodes = '';
 | |
| 
 | |
| msg ("-- Post-processing `makeinfo --docbook` output --");
 | |
| msg ("** Written to work with makeinfo version 4.0c **\n");
 | |
| 
 | |
| msg ("Discarding DTD - not required by subsequent scripts");
 | |
| # <> is a magic filehandle - either reading lines from stdin or from file(s) specified on the command line
 | |
| <>;
 | |
| 
 | |
| msg ("Create an XML PI with ISO-8859-1 character encoding");
 | |
| $data = "<?xml version='1.0' encoding='ISO-8859-1'?>";
 | |
| 
 | |
| msg ("Get the rest of the data");
 | |
| $data = $data . join "", <>;
 | |
| 
 | |
| msg ("Add missing <bookinfo> and <abstract> opening tags");
 | |
| # Note the absence of the g (global) pattern modified. This situation can only happen once.
 | |
| # ...as soon as we find the first instance, we can stop looking.
 | |
| $data =~ s/<book lang="en">/<book lang="en"><bookinfo><abstract>/;
 | |
| 
 | |
| 
 | |
| # arjen 2002-05-01
 | |
| msg ("Processing docbook-prefix special strings");
 | |
| $data =~ s/FIXUPmdashFIXUP/\&mdash\;/g;
 | |
| 
 | |
| $data =~ s/FIXUPdoubledashFIXUP/--/g;
 | |
| 
 | |
| $data =~ s/FIXUPstrongFIXUP/<emphasis\ role\=\"bold\">/g;
 | |
| $data =~ s/FIXUPendstrongFIXUP/<\/emphasis>/g;
 | |
| 
 | |
| $data =~ s/FIXUPemphFIXUP/<emphasis>/g;
 | |
| $data =~ s/FIXUPendemphFIXUP/<\/emphasis>/g;
 | |
| 
 | |
| $data =~ s/FIXUPfileFIXUP/<filename>/g;
 | |
| $data =~ s/FIXUPendfileFIXUP/<\/filename>/g;
 | |
| 
 | |
| $data =~ s/FIXUPsampFIXUP/<literal>/g;
 | |
| $data =~ s/FIXUPendsampFIXUP/<\/literal>/g;
 | |
| 
 | |
| 
 | |
| msg ("Removing mailto: from email addresses...");
 | |
| $data =~ s/mailto://g;
 | |
| 
 | |
| msg ("Removing INFORMALFIGURE...");
 | |
| $data =~ s{<informalfigure>.+?</informalfigure>}
 | |
|           {}gs;
 | |
| 
 | |
| msg ("Convert ampersand to XML escape sequence...");
 | |
| $data =~ s/&(?!\w+;)/&/g;
 | |
| 
 | |
| # arjen 2002-05-01
 | |
| msg ("Changing (TM) to XML escape sequence...");
 | |
| $data =~ s/MySQL \(TM\)/MySQL™/g;
 | |
| $data =~ s{<command>TM</command>}
 | |
|           {™}g;
 | |
| 
 | |
| # arjen 2002-05-01
 | |
| msg ("Changing ' -- ' to XML escape sequence...");
 | |
| $data =~ s/ -- /—/g;
 | |
| 
 | |
| msg ("Changing @@ to @...");
 | |
| $data =~ s/@@/@/g;
 | |
| 
 | |
| msg ("Rework references of the notation '<n>'");
 | |
| # Need to talk to Arjen about what the <n> bits are for
 | |
| $data =~ s/<(\d)>/[$1]/g;
 | |
| 
 | |
| msg ("Changing '_' to '-' in references...");
 | |
| $data =~ s{((?:id|linkend)=\".+?\")}
 | |
|           {&underscore2hyphen($1)}gex;
 | |
| 
 | |
| msg ("Changing ULINK to SYSTEMITEM...");
 | |
| $data =~ s{<ulink url=\"(.+?)\">\s*</ulink>}
 | |
|           {<systemitem role=\"url\">$1</systemitem>}gs;
 | |
| 
 | |
| msg ("Adding PARA inside ENTRY...");
 | |
| $data =~ s{<entry>(.*?)</entry>}
 | |
|           {<entry><para>$1</para></entry>}gs;
 | |
| 
 | |
| msg ("Fixing spacing problem with titles...");
 | |
| $data =~ s{(</\w+>)(\w{2,})}
 | |
|           {$1 $2}gs;
 | |
| 
 | |
| msg ("Adding closing / to XREF and COLSPEC tags...");
 | |
| $data =~ s{<(xref|colspec) (.+?)>}
 | |
|           {<$1 $2 />}gs;
 | |
| 
 | |
| # arjen 2002-04-26
 | |
| msg ("Removing separate target titles from LINKs and make them XREFs...");
 | |
| $data =~ s{<link (linkend=.+?)>.+?</link>}
 | |
|           {<xref $1 />}gs;
 | |
| 
 | |
| # Probably need to strip these
 | |
| msg ('Adding "See " to XREFs that used to be @xref...');
 | |
| $data =~ s{([.'!)])\s*<xref }
 | |
|           {$1 See <xref }gs;
 | |
| 
 | |
| msg ('Adding "see " to (XREFs) that used to be (@pxref)...');
 | |
| $data =~ s{([([,;])(\s*)<xref }
 | |
|           {$1$2see <xref }gs;
 | |
| 
 | |
| msg ("Making first row in table THEAD...");
 | |
| $data =~ s{( *)<tbody>(\s*<row>.+?</row>)}
 | |
|           {$1<thead>$2\n$1</thead>\n$1<tbody>}gs;
 | |
| 
 | |
| msg ("Removing EMPHASIS inside THEAD...");
 | |
| $data =~ s{<thead>(.+?)</thead>}
 | |
|           {"<thead>".&strip_tag($1, 'emphasis')."</thead>"}gsex;
 | |
| 
 | |
| msg ("Removing empty PARA...");
 | |
| $data =~ s{<para>\s*</para>}
 | |
|           {}gs;
 | |
| 
 | |
| msg ("Removing lf before /PARA in ENTRY...");
 | |
| $data =~ s{\n(</para></entry>)}
 | |
|           {$1}gs;
 | |
| 
 | |
| msg ("Removing whitespace before /PARA if not on separate line...");
 | |
| $data =~ s{(\S+)[\t ]+</para>}
 | |
|           {$1</para>}g;
 | |
| 
 | |
| msg ("Removing PARA around INDEXTERM if no text in PARA...");
 | |
| $data =~ s{<para>((?:<indexterm role=\"[^"]+\">(?:<(primary|secondary)>[^>]+</\2>)+?</indexterm>)+?)\s*</para>}
 | |
|           {$1}gs;
 | |
| 
 | |
| @apx = ("Users", "MySQL Testimonials", "News", "GPL-license", "LGPL-license");
 | |
| 
 | |
| foreach $apx (@apx) {
 | |
|     msg ("Removing appendix $apx...");
 | |
|     $data =~ s{<appendix id=\"$apx\">(.+?)</appendix>}
 | |
|               {}gs;
 | |
| 
 | |
|     # Skip to next appendix regex if the regex did not match anything
 | |
|     next unless (defined $&);
 | |
|     
 | |
|     msg ("...Building list of removed nodes...");
 | |
|     
 | |
|     # Split the last bracketed regex match into an array
 | |
|     # Extract the node names from the tags and push them into an array
 | |
|     foreach (split "\n", $&) {
 | |
|         push @nodes, $1 if /<\w+ id=\"(.+?)\">/
 | |
|     }
 | |
| }
 | |
| 
 | |
| # 2002-02-22 arjen@mysql.com (added fix " /" to end of regex, to make it match)
 | |
| msg ("Fixing references to removed nodes...");
 | |
| # Merge the list of node names into a set of regex alternations
 | |
| $nodes = join "|", @nodes;
 | |
| 
 | |
| # Find all references to removed nodes and convert them to absolute URLs
 | |
| $data =~ s{<\w+ linkend="($nodes)" />}
 | |
|           {&xref2link($1)}ges;
 | |
| 
 | |
| print STDOUT $data;
 | |
| exit;
 | |
| 
 | |
| #
 | |
| # Definitions for helper sub-routines
 | |
| #
 | |
| 
 | |
| sub msg {
 | |
|     print STDERR "docbook-fixup:", shift, "\n";
 | |
| }
 | |
| 
 | |
| sub strip_tag($$) {
 | |
|     (my $str, my $tag) = @_;
 | |
|     $str =~ s{<$tag>(.+?)</$tag>}{$1}gs;
 | |
|     return $str;
 | |
| }
 | |
| 
 | |
| sub underscore2hyphen($) {
 | |
|     my $str = shift;
 | |
|     $str =~ tr/_/-/;
 | |
|     return $str;
 | |
| }
 | |
| 
 | |
| sub xref2link {
 | |
|     my $ref = shift;
 | |
|     $ref =~ tr/ /_/;
 | |
|     $ref =~ s{^((.)(.).+)$}{$2/$3/$1.html};
 | |
|     return "http://www.mysql.com/doc/" . $ref;
 | |
| }
 | |
| 
 | |
| # We might need to encode the high-bit characters to ensure proper representation
 | |
| # msg ("Converting high-bit characters to entities");
 | |
| # $data =~ s/([\200-\400])/&get_entity($1)>/gs;
 | |
| # There is no get_entity function yet - no point writing it til we need it :)
 | 
