1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-16 17:07:43 +03:00

Improve docs syntax checking

Move the checks out of the Makefile into a perl script that can be
called from both the Makefile and meson.build. The set of files checked
is simplified, so it is just all the sgml and xsl files found in
docs/src/sgml directory tree.

Along the way make some adjustments to .cirrus.tasks.yml to support this
better in CI.

Also ensure that the checks are part of the Makefile's html target.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Co-Author: Andrew Dunstan <andrew@dunslane.net>

Discussion: https://postgr.es/m/CAN55FZ3BnM+0twT-ZWL8As9oBEte_b+SBU==cz6Hk8JUCM_5Wg@mail.gmail.com
This commit is contained in:
Andrew Dunstan
2025-09-30 15:39:15 -04:00
parent 482bc0705d
commit b292256272
4 changed files with 106 additions and 15 deletions

View File

@@ -627,6 +627,8 @@ task:
TEST_JOBS: 8
IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma
XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml
CIRRUS_WORKING_DIR: ${HOME}/pgsql/
CCACHE_DIR: ${HOME}/ccache
MACPORTS_CACHE: ${HOME}/macports-cache
@@ -641,6 +643,7 @@ task:
MACOS_PACKAGE_LIST: >-
ccache
docbook-xml-4.5
icu
kerberos5
lz4

View File

@@ -124,7 +124,7 @@ ifeq ($(STYLE),website)
XSLTPROC_HTML_FLAGS += --param website.stylesheet 1
endif
html: html-stamp
html: check html-stamp
html-stamp: stylesheet.xsl postgres-full.xml $(ALL_IMAGES)
$(XSLTPROC) $(XMLINCLUDE) $(XSLTPROCFLAGS) $(XSLTPROC_HTML_FLAGS) $(wordlist 1,2,$^)
@@ -200,8 +200,8 @@ MAKEINFO = makeinfo
##
# Quick syntax check without style processing
check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp
$(XMLLINT) $(XMLINCLUDE) --noout --valid $<
check: postgres.sgml $(ALL_SGML)
$(PERL) $(srcdir)/sgml_syntax_check.pl --xmllint "$(XMLLINT)" --srcdir $(srcdir)
##
@@ -261,18 +261,6 @@ clean-man:
endif # sqlmansectnum != 7
# tabs are harmless, but it is best to avoid them in SGML files
check-tabs:
@( ! grep ' ' $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \
(echo "Tabs appear in SGML/XML files" 1>&2; exit 1)
# Non-breaking spaces are harmless, but it is best to avoid them in SGML files.
# Use perl command because non-GNU grep or sed could not have hex escape sequence.
check-nbsp:
@ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
$(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
(echo "Non-breaking spaces appear in SGML/XML files" 1>&2; exit 1)
##
## Clean
##

View File

@@ -306,3 +306,26 @@ endif
if alldocs.length() != 0
alias_target('alldocs', alldocs)
endif
sgml_syntax_check = files(
'sgml_syntax_check.pl'
)
test(
'sgml_syntax_check',
perl,
protocol: 'exitcode',
suite: 'doc',
args: [
sgml_syntax_check,
'--xmllint',
'@0@ --nonet'.format(xmllint_bin.full_path()),
'--srcdir',
meson.current_source_dir(),
'--builddir',
meson.current_build_dir(),
],
depends: doc_generated
)
testprep_targets += doc_generated

View File

@@ -0,0 +1,77 @@
# /usr/bin/perl
# Copyright (c) 2025, PostgreSQL Global Development Group
# doc/src/sgml/sgml_syntax_check.pl
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
use File::Find;
my $xmllint;
my $srcdir;
my $builddir;
GetOptions(
'xmllint:s' => \$xmllint,
'srcdir:s' => \$srcdir,
'builddir:s' => \$builddir) or die "$0: wrong arguments";
die "$0: --srcdir must be specified\n" unless defined $srcdir;
my $xmlinclude = "--path . --path $srcdir";
$xmlinclude .= " --path $builddir" if defined $builddir;
# find files to process - all the sgml and xsl files (including in subdirectories)
my @files_to_process;
my @dirs_to_search = ($srcdir);
push @dirs_to_search, $builddir if defined $builddir;
find(
sub {
return unless -f $_;
return if $_ !~ /\.(sgml|xsl)$/;
push @files_to_process, $File::Find::name;
},
@dirs_to_search,);
# tabs and non-breaking spaces are harmless, but it is best to avoid them in SGML files
sub check_tabs_and_nbsp
{
my $errors = 0;
for my $f (@files_to_process)
{
open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!";
my $line_no = 0;
while (<$fh>)
{
$line_no++;
if (/\t/)
{
print STDERR "Tab found in $f:$line_no\n";
$errors++;
}
if (/\xC2\xA0/)
{
print STDERR "$f:$line_no: contains non-breaking space\n";
$errors++;
}
}
close($fh);
}
if ($errors)
{
die "Tabs and/or non-breaking spaces appear in SGML/XML files\n";
}
}
sub run_xmllint
{
my $cmd = "$xmllint $xmlinclude --noout --valid postgres.sgml";
system($cmd) == 0 or die "xmllint validation failed\n";
}
run_xmllint();
check_tabs_and_nbsp();