blob: 1d6702972825b39377183854a3386009f107bcd4 [file] [log] [blame]
#!/usr/bin/perl -w
# Generate a short man page from --help and --version output.
# Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009,
# 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
# Written by Brendan O'Dea <bod@debian.org>
# Available from ftp://ftp.gnu.org/gnu/help2man/
use 5.008;
use strict;
use Getopt::Long;
use Text::ParseWords qw(shellwords);
use Text::Tabs qw(expand);
use POSIX qw(strftime setlocale LC_ALL);
use Locale::gettext qw(gettext);
use Encode qw(decode encode);
use I18N::Langinfo qw(langinfo CODESET);
my $this_program = 'help2man';
my $this_version = '1.47.6';
my $encoding;
{
my $gettext = Locale::gettext->domain($this_program);
sub _ { $gettext->get($_[0]) }
my ($user_locale) = grep defined && length,
(map $ENV{$_}, qw(LANGUAGE LC_ALL LC_MESSAGES LANG)), 'C';
my $user_encoding = langinfo CODESET;
# Set localisation of date and executable's output.
sub configure_locale
{
delete @ENV{qw(LANGUAGE LC_MESSAGES LANG)};
setlocale LC_ALL, $ENV{LC_ALL} = shift || 'C';
$encoding = langinfo CODESET;
}
sub dec { $encoding ? decode $encoding, $_[0] : $_[0] }
sub enc { $encoding ? encode $encoding, $_[0] : $_[0] }
sub enc_user { encode $user_encoding, $_[0] }
sub kark # die with message formatted in the invoking user's locale
{
setlocale LC_ALL, $user_locale;
my $fmt = $gettext->get(shift);
my $errmsg = enc_user sprintf $fmt, @_;
die $errmsg, "\n";
}
}
sub N_ { $_[0] }
sub program_basename;
sub get_option_value;
sub convert_option;
sub fix_italic_spacing;
my $version_info = enc_user sprintf _(<<'EOT'), $this_program, $this_version;
GNU %s %s
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
Written by Brendan O'Dea <bod@debian.org>
EOT
my $help_info = enc_user sprintf _(<<'EOT'), $this_program, $this_program;
`%s' generates a man page out of `--help' and `--version' output.
Usage: %s [OPTION]... EXECUTABLE
-n, --name=STRING description for the NAME paragraph
-s, --section=SECTION section number for manual page (1, 6, 8)
-m, --manual=TEXT name of manual (User Commands, ...)
-S, --source=TEXT source of program (FSF, Debian, ...)
-L, --locale=STRING select locale (default "C")
-i, --include=FILE include material from `FILE'
-I, --opt-include=FILE include material from `FILE' if it exists
-o, --output=FILE send output to `FILE'
-p, --info-page=TEXT name of Texinfo manual
-N, --no-info suppress pointer to Texinfo manual
-l, --libtool exclude the `lt-' from the program name
--help print this help, then exit
--version print version number, then exit
EXECUTABLE should accept `--help' and `--version' options and produce output on
stdout although alternatives may be specified using:
-h, --help-option=STRING help option string
-v, --version-option=STRING version option string
--version-string=STRING version string
--no-discard-stderr include stderr when parsing option output
Report bugs to <bug-help2man@gnu.org>.
EOT
my $section = 1;
my $manual = '';
my $source = '';
my $help_option = '--help';
my $version_option = '--version';
my $discard_stderr = 1;
my ($opt_name, @opt_include, $opt_output, $opt_info, $opt_no_info, $opt_libtool,
$version_text);
my %opt_def = (
'n|name=s' => \$opt_name,
's|section=s' => \$section,
'm|manual=s' => \$manual,
'S|source=s' => \$source,
'L|locale=s' => sub { configure_locale pop },
'i|include=s' => sub { push @opt_include, [ pop, 1 ] },
'I|opt-include=s' => sub { push @opt_include, [ pop, 0 ] },
'o|output=s' => \$opt_output,
'p|info-page=s' => \$opt_info,
'N|no-info' => \$opt_no_info,
'l|libtool' => \$opt_libtool,
'help' => sub { print $help_info; exit },
'version' => sub { print $version_info; exit },
'h|help-option=s' => \$help_option,
'v|version-option=s' => \$version_option,
'version-string=s' => \$version_text,
'discard-stderr!' => \$discard_stderr,
);
# Parse options.
Getopt::Long::config('bundling');
die $help_info unless GetOptions %opt_def and @ARGV == 1;
configure_locale unless $encoding;
my %include = ();
my %replace = ();
my %append = ();
my %append_match = ();
my @sections = (); # retain order of include file or in-line *section*s
# Process include file (if given). Format is:
#
# Optional initial text, ignored. May include lines starting with `-'
# which are processed as options.
#
# [section]
# Verbatim text to be included in the named section. By default at
# the start, but in the case of `name' and `synopsis' the content
# will replace the autogenerated contents.
#
# [<section]
# Verbatim text to be inserted at the start of the named section.
#
# [=section]
# Verbatim text to replace the named section.
#
# [>section]
# Verbatim text to be appended to the end of the named section.
#
# /pattern/
# Verbatim text for inclusion below a paragraph matching `pattern'.
#
while (@opt_include)
{
my ($inc, $required) = @{shift @opt_include};
next unless -f $inc or $required;
kark N_("%s: can't open `%s' (%s)"), $this_program, $inc, $!
unless open INC, $inc;
my $key;
my $hash;
while (<INC>)
{
# Convert input to internal Perl format, so that multibyte
# sequences are treated as single characters.
$_ = dec $_;
# [section]
if (/^\[([^]]+)\]\s*$/)
{
$key = uc $1;
$key =~ s/^\s+//;
$key =~ s/\s+$//;
$hash = \%include;
# Handle explicit [<section], [=section] and [>section]
if ($key =~ s/^([<>=])\s*//)
{
if ($1 eq '>') { $hash = \%append; }
elsif ($1 eq '=') { $hash = \%replace; }
}
# NAME/SYNOPSIS replace by default
elsif ($key eq _('NAME') or $key eq _('SYNOPSIS'))
{
$hash = \%replace;
}
else
{
$hash = \%include;
}
push @sections, $key;
next;
}
# /pattern/
if (m!^/(.*)/([ims]*)\s*$!)
{
my $pat = $2 ? "(?$2)$1" : $1;
# Check pattern.
eval { $key = qr($pat) };
if ($@)
{
$@ =~ s/ at .*? line \d.*//;
die "$inc:$.:$@";
}
$hash = \%append_match;
next;
}
# Check for options before the first section--anything else is
# silently ignored, allowing the first for comments and
# revision info.
unless ($key)
{
# handle options
if (/^-/)
{
local @ARGV = shellwords $_;
GetOptions %opt_def;
}
next;
}
$hash->{$key} .= $_;
}
close INC;
kark N_("%s: no valid information found in `%s'"), $this_program, $inc
unless $key;
}
# Compress trailing blank lines.
for my $hash (\(%include, %replace, %append, %append_match))
{
for (keys %$hash) { $hash->{$_} =~ s/\n+$/\n/ }
}
# Grab help and version info from executable.
my $help_text = get_option_value $ARGV[0], $help_option;
$version_text ||= get_option_value $ARGV[0], $version_option;
# By default the generated manual pages will include the current date. This may
# however be overriden by setting the environment variable $SOURCE_DATE_EPOCH to
# an integer value of the seconds since the UNIX epoch. This is primarily
# intended to support reproducible builds (wiki.debian.org/ReproducibleBuilds)
# and will additionally ensure that the output date string is UTC.
my $epoch_secs = time;
if (exists $ENV{SOURCE_DATE_EPOCH} and $ENV{SOURCE_DATE_EPOCH} =~ /^(\d+)$/)
{
$epoch_secs = $1;
$ENV{TZ} = 'UTC0';
}
# Translators: the following message is a strftime(3) format string, which in
# the English version expands to the month as a word and the full year. It
# is used on the footer of the generated manual pages. If in doubt, you may
# just use %x as the value (which should be the full locale-specific date).
my $date = enc strftime _("%B %Y"), localtime $epoch_secs;
my $program = program_basename $ARGV[0];
my $package = $program;
my $version;
if ($opt_output)
{
unlink $opt_output or kark N_("%s: can't unlink %s (%s)"),
$this_program, $opt_output, $! if -e $opt_output;
open STDOUT, ">$opt_output"
or kark N_("%s: can't create %s (%s)"), $this_program, $opt_output, $!;
}
# The first line of the --version information is assumed to be in one
# of the following formats:
#
# <version>
# <program> <version>
# {GNU,Free} <program> <version>
# <program> ({GNU,Free} <package>) <version>
# <program> - {GNU,Free} <package> <version>
#
# and separated from any copyright/author details by a blank line.
($_, $version_text) = ((split /\n+/, $version_text, 2), '');
if (/^(\S+) +\(((?:GNU|Free) +[^)]+)\) +(.*)/ or
/^(\S+) +- *((?:GNU|Free) +\S+) +(.*)/)
{
$program = program_basename $1;
$package = $2;
$version = $3;
}
elsif (/^((?:GNU|Free) +)?(\S+) +(.*)/)
{
$program = program_basename $2;
$package = $1 ? "$1$program" : $program;
$version = $3;
}
else
{
$version = $_;
}
# No info for `info' itself.
$opt_no_info = 1 if $program eq 'info';
if ($opt_name)
{
# --name overrides --include contents.
$replace{_('NAME')} = "$program \\- $opt_name\n";
}
# Translators: "NAME", "SYNOPSIS" and other one or two word strings in all
# upper case are manual page section headings. The man(1) manual page in your
# language, if available should provide the conventional translations.
for ($replace{_('NAME')} || ($include{_('NAME')} ||= ''))
{
if ($_) # Use first name given as $program
{
$program = $1 if /^([^\s,]+)(?:,?\s*[^\s,\\-]+)*\s+\\?-/;
}
else # Set a default (useless) NAME paragraph.
{
$_ = sprintf _("%s \\- manual page for %s %s") . "\n", $program,
$program, $version;
}
}
# Man pages traditionally have the page title in caps.
my $PROGRAM = uc $program;
# Set default page head/footers
$source ||= "$package $version";
unless ($manual)
{
for ($section)
{
if (/^(1[Mm]|8)/) { $manual = enc _('System Administration Utilities') }
elsif (/^6/) { $manual = enc _('Games') }
else { $manual = enc _('User Commands') }
}
}
# Extract usage clause(s) [if any] for SYNOPSIS.
# Translators: "Usage" and "or" here are patterns (regular expressions) which
# are used to match the usage synopsis in program output. An example from cp
# (GNU coreutils) which contains both strings:
# Usage: cp [OPTION]... [-T] SOURCE DEST
# or: cp [OPTION]... SOURCE... DIRECTORY
# or: cp [OPTION]... -t DIRECTORY SOURCE...
my $PAT_USAGE = _('Usage');
my $PAT_USAGE_CONT = _('or');
if ($help_text =~ s/^($PAT_USAGE):( +(\S+))(.*)((?:\n(?: {6}\1| *($PAT_USAGE_CONT): +\S).*)*)//om)
{
my @syn = $3 . $4;
if ($_ = $5)
{
s/^\n//;
for (split /\n/) { s/^ *(($PAT_USAGE_CONT): +)?//o; push @syn, $_ }
}
my $synopsis = '';
for (@syn)
{
$synopsis .= ".br\n" if $synopsis;
s!^\S*/!!;
s/^lt-// if $opt_libtool;
s/^(\S+) *//;
$synopsis .= ".B $1\n";
s/\s+$//;
s/(([][]|\.\.+)+)/\\fR$1\\fI/g;
s/^/\\fI/ unless s/^\\fR//;
$_ .= '\fR';
s/(\\fI)( *)/$2$1/g;
s/\\fI\\fR//g;
s/^\\fR//;
s/\\fI$//;
s/^\./\\&./;
$_ = fix_italic_spacing $_;
$synopsis .= "$_\n";
}
$include{_('SYNOPSIS')} .= $synopsis;
}
# Process text, initial section is DESCRIPTION.
my $sect = _('DESCRIPTION');
$_ = "$help_text\n\n$version_text";
# Normalise paragraph breaks.
s/^\n+//;
s/\n*$/\n/;
s/\n\n+/\n\n/g;
# Join hyphenated lines.
s/([A-Za-z])-\n *([A-Za-z])/$1$2/g;
# Temporarily exchange leading dots, apostrophes and backslashes for
# tokens.
s/^\./\x80/mg;
s/^'/\x81/mg;
s/\\/\x82/g;
# Translators: patterns are used to match common program output. In the source
# these strings are all of the form of "my $PAT_something = _('...');" and are
# regular expressions. If there is more than one commonly used string, you
# may separate alternatives with "|". Spaces in these expressions are written
# as " +" to indicate that more than one space may be matched. The string
# "(?:[\\w-]+ +)?" in the bug reporting pattern is used to indicate an
# optional word, so that either "Report bugs" or "Report _program_ bugs" will
# be matched.
my $PAT_BUGS = _('Report +(?:[\w-]+ +)?bugs|Email +bug +reports +to');
my $PAT_AUTHOR = _('Written +by');
my $PAT_OPTIONS = _('Options');
my $PAT_ENVIRONMENT = _('Environment');
my $PAT_FILES = _('Files');
my $PAT_EXAMPLES = _('Examples');
my $PAT_FREE_SOFTWARE = _('This +is +free +software');
# Start a new paragraph (if required) for these.
s/([^\n])\n($PAT_BUGS|$PAT_AUTHOR) /$1\n\n$2 /og;
# Convert iso-8859-1 copyright symbol or (c) to nroff
# character.
s/^Copyright +(?:\xa9|\([Cc]\))/Copyright \\(co/mg;
while (length)
{
# Convert some standard paragraph names.
if (s/^($PAT_OPTIONS): *\n+//o)
{
$sect = _('OPTIONS');
next;
}
if (s/^($PAT_ENVIRONMENT): *\n+//o)
{
$sect = _('ENVIRONMENT');
next;
}
if (s/^($PAT_FILES): *\n+//o)
{
$sect = _('FILES');
next;
}
elsif (s/^($PAT_EXAMPLES): *\n+//o)
{
$sect = _('EXAMPLES');
next;
}
# Custom section indicated by a line containing "*Section Name*".
if (s/^\*(\w(.*\w)?)\* *\n+//)
{
$sect = uc $1;
$sect =~ tr/*/ /; # also accept *Section*Name*
push @sections, $sect;
next;
}
# Copyright section.
if (/^Copyright /)
{
$sect = _('COPYRIGHT');
}
# Bug reporting section.
elsif (/^($PAT_BUGS) /o)
{
$sect = _('REPORTING BUGS');
}
# Author section.
elsif (/^($PAT_AUTHOR)/o)
{
$sect = _('AUTHOR');
}
# Examples, indicated by an indented leading $, % or > are
# rendered in a constant width font.
if (/^( +)([\$\%>] )\S/)
{
my $indent = $1;
my $prefix = $2;
my $break = '.IP';
while (s/^$indent\Q$prefix\E(\S.*)\n*//)
{
$include{$sect} .= "$break\n\\f(CW$prefix$1\\fR\n";
$break = '.br';
}
next;
}
my $matched = '';
# Sub-sections have a trailing colon and the second line indented.
if (s/^(\S.*:) *\n / /)
{
$matched .= $& if %append_match;
$include{$sect} .= qq(.SS "$1"\n);
}
my $indent = 0;
my $content = '';
# Option with description.
if (s/^( {1,10}([+-]\S.*?))(?:( +(?!-))|\n( {20,}))(\S.*)\n//)
{
$matched .= $& if %append_match;
$indent = length ($4 || "$1$3");
$content = ".TP\n\x84$2\n\x84$5\n";
unless ($4)
{
# Indent may be different on second line.
$indent = length $& if /^ {20,}/;
}
}
# Option without description.
elsif (s/^ {1,10}([+-]\S.*)\n//)
{
$matched .= $& if %append_match;
$content = ".HP\n\x84$1\n";
$indent = 80; # not continued
}
# Indented paragraph with tag.
elsif (s/^( +(\S.*?))(?:( +)|\n( {20,}))(\S.*)\n//)
{
$matched .= $& if %append_match;
$indent = length ($4 || "$1$3");
$content = ".TP\n\x84$2\n\x84$5\n";
}
# Indented paragraph.
elsif (s/^( +)(\S.*)\n//)
{
$matched .= $& if %append_match;
$indent = length $1;
$content = ".IP\n\x84$2\n";
}
# Left justified paragraph.
else
{
s/(.*)\n//;
$matched .= $& if %append_match;
$content = ".PP\n" if $include{$sect};
$content .= "$1\n";
}
# Append continuations.
while ($indent ? s/^ {$indent}(\S.*)\n// : s/^(\S.*)\n//)
{
$matched .= $& if %append_match;
$content .= "\x84$1\n";
}
# Move to next paragraph.
s/^\n+//;
for ($content)
{
# Leading dot and apostrophe protection.
s/\x84\./\x80/g;
s/\x84'/\x81/g;
s/\x84//g;
# Examples should be verbatim.
unless ($sect eq _('EXAMPLES'))
{
# Convert options.
s/(^|[ (])(-[][\w=-]+)/$1 . convert_option $2/mge;
# Italicise filenames: /a/b, $VAR/c/d, ~/e/f
s!
(^|[ (]) # space/punctuation before
(
(?:\$\w+|~)? # leading variable, or tilde
(?:/\w(?:[\w.-]*\w)?)+ # path components
)
($|[ ,;.)]) # space/punctuation after
!$1\\fI$2\\fP$3!xmg;
$_ = fix_italic_spacing $_;
}
# Escape remaining hyphens.
s/-/\x83/g;
if ($sect eq _('COPYRIGHT'))
{
# Insert line breaks before additional copyright messages
# and the disclaimer.
s/\n(Copyright |$PAT_FREE_SOFTWARE)/\n.br\n$1/og;
}
elsif ($sect eq _('REPORTING BUGS'))
{
# Handle multi-line bug reporting sections of the form:
#
# Report <program> bugs to <addr>
# GNU <package> home page: <url>
# ...
s/\n([[:upper:]])/\n.br\n$1/g;
}
}
# Check if matched paragraph contains /pat/.
if (%append_match)
{
for my $pat (keys %append_match)
{
if ($matched =~ $pat)
{
$content .= ".PP\n" unless $append_match{$pat} =~ /^\./;
$content .= $append_match{$pat};
}
}
}
$include{$sect} .= $content;
}
# Refer to the real documentation.
unless ($opt_no_info)
{
my $info_page = $opt_info || $program;
$sect = _('SEE ALSO');
$include{$sect} .= ".PP\n" if $include{$sect};
$include{$sect} .= sprintf _(<<'EOT'), $program, $program, $info_page;
The full documentation for
.B %s
is maintained as a Texinfo manual. If the
.B info
and
.B %s
programs are properly installed at your site, the command
.IP
.B info %s
.PP
should give you access to the complete manual.
EOT
}
# Append additional text.
while (my ($sect, $text) = each %append)
{
$include{$sect} .= $append{$sect};
}
# Replace sections.
while (my ($sect, $text) = each %replace)
{
$include{$sect} = $replace{$sect};
}
# Output header.
print <<EOT;
.\\" DO NOT MODIFY THIS FILE! It was generated by $this_program $this_version.
.TH $PROGRAM "$section" "$date" "$source" "$manual"
EOT
# Section ordering.
my @pre = (_('NAME'), _('SYNOPSIS'), _('DESCRIPTION'), _('OPTIONS'));
my @post = (_('ENVIRONMENT'), _('FILES'), _('EXAMPLES'), _('AUTHOR'),
_('REPORTING BUGS'), _('COPYRIGHT'), _('SEE ALSO'));
my %filter = map { $_ => 1 } @pre, @post;
# Output content.
my %done;
for my $sect (@pre, (grep !$filter{$_}, @sections), @post)
{
next if $done{$sect}++; # ignore duplicates
next unless $include{$sect};
if ($include{$sect})
{
my $quote = $sect =~ /\W/ ? '"' : '';
print enc ".SH $quote$sect$quote\n";
for ($include{$sect})
{
# Replace leading dot, apostrophe, backslash and hyphen
# tokens.
s/\x80/\\&./g;
s/\x81/\\&'/g;
s/\x82/\\e/g;
s/\x83/\\-/g;
# Convert some latin1 chars to troff equivalents
s/\xa0/\\ /g; # non-breaking space
print enc $_;
}
}
}
close STDOUT or kark N_("%s: error writing to %s (%s)"), $this_program,
$opt_output || 'stdout', $!;
exit;
# Get program basename, and strip libtool "lt-" prefix if required.
sub program_basename
{
local $_ = shift;
s!.*/!!;
s/^lt-// if $opt_libtool;
$_;
}
# Call program with given option and return results.
sub get_option_value
{
my ($prog, $opt) = @_;
my $stderr = $discard_stderr ? '/dev/null' : '&1';
my $value = join '',
map { s/ +$//; expand $_ }
map { dec $_ }
`$prog $opt 2>$stderr`;
unless ($value)
{
my $err = N_("%s: can't get `%s' info from %s%s");
my $extra = $discard_stderr
? "\n" . N_("Try `--no-discard-stderr' if option outputs to stderr")
: '';
kark $err, $this_program, $opt, $prog, $extra;
}
$value;
}
# Convert option dashes to \- to stop nroff from hyphenating 'em, and
# embolden. Option arguments get italicised.
sub convert_option
{
local $_ = '\fB' . shift;
s/-/\x83/g;
unless (s/\[=(.*)\]$/\\fR[=\\fI$1\\fR]/)
{
s/=(.)/\\fR=\\fI$1/;
s/ (.)/ \\fI$1/;
$_ .= '\fR';
}
$_;
}
# Insert spacing escape characters \, and \/ before and after italic text. See
# http://www.gnu.org/software/groff/manual/html_node/Ligatures-and-Kerning.html
sub fix_italic_spacing
{
local $_ = shift;
s!\\fI(.*?)\\f([BRP])!\\fI\\,$1\\/\\f$2!g;
return $_;
}