From 2f438fa9f53250fb3c8b39a95eedd627b5569ca4 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Thu, 27 Dec 2018 22:36:08 -0700 Subject: [PATCH] basenc: A new program complementary to base64/base32 Encodes/decodes data in various common formats: base64,base64url,base32,base32,base16,base2,z85. Discussed here: https://lists.gnu.org/r/coreutils/2018-11/msg00014.html https://lists.gnu.org/r/coreutils/2018-12/msg00019.html * AUTHORS: Add basenc. * README: Reference the new program. * NEWS: Mention the new program. * build-aux/gen-lists-of-programs.sh: Add basenc. * doc/coreutils.texi: (basenc invocation): Document the new command. * man/.gitignore: Ignore the generated man page. * man/basenc.x: A new template, with few examples. * man/local.mk: Reference the new man page. * scripts/git-hooks/commit-msg: Allow basenc as program prefix. * src/.gitignore: Ignore the new binary. * src/basenc.c: (usage): Mention new options. (main): Handle new options. (isbase*, base*_length, base*_encode, base*_decode_ctx): Implement new encoding/decoding formats. * src/local.mk: Add new program. * tests/local.mk: Add new test. * tests/misc/basenc.pl: New tests. * tests/misc/help-version.sh (basenc_setup): use '--version' for default invocation (basenc errors with no parameters). --- AUTHORS | 1 + NEWS | 6 + README | 18 +- build-aux/gen-lists-of-programs.sh | 1 + doc/coreutils.texi | 131 +++- man/.gitignore | 1 + man/basenc.x | 38 ++ man/local.mk | 1 + scripts/git-hooks/commit-msg | 8 +- src/.gitignore | 1 + src/basenc.c | 922 ++++++++++++++++++++++++++++- src/local.mk | 3 + tests/local.mk | 1 + tests/misc/basenc.pl | 284 +++++++++ tests/misc/help-version.sh | 1 + 15 files changed, 1397 insertions(+), 20 deletions(-) create mode 100644 man/basenc.x create mode 100755 tests/misc/basenc.pl diff --git a/AUTHORS b/AUTHORS index de27076bf..46948d5fd 100644 --- a/AUTHORS +++ b/AUTHORS @@ -6,6 +6,7 @@ b2sum: Padraig Brady, Samuel Neves base32: Simon Josefsson base64: Simon Josefsson basename: David MacKenzie +basenc: Simon Josefsson, Assaf Gordon cat: Torbjorn Granlund, Richard M. Stallman chcon: Russell Coker, Jim Meyering chgrp: David MacKenzie, Jim Meyering diff --git a/NEWS b/NEWS index 47eb1ddcc..6963fe305 100644 --- a/NEWS +++ b/NEWS @@ -44,6 +44,12 @@ GNU coreutils NEWS -*- outline -*- test now supports the '-N FILE' unary operator (like e.g. bash) to check whether FILE exists and has been modified since it was last read. +** New commands + + basenc is added to complement existing base64,base32 commands, + and encodes and decodes printable text using various common encodings: + base64,base64url,base32,base32hex,base16,base2,z85. + * Noteworthy changes in release 8.30 (2018-07-01) [stable] diff --git a/README b/README index 643e5eab6..1f2a215fd 100644 --- a/README +++ b/README @@ -7,15 +7,15 @@ arbitrary limits. The programs that can be built with this package are: - [ arch b2sum base32 base64 basename cat chcon chgrp chmod chown chroot cksum - comm coreutils cp csplit cut date dd df dir dircolors dirname du echo env - expand expr factor false fmt fold groups head hostid hostname id install - join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp mv nice nl - nohup nproc numfmt od paste pathchk pinky pr printenv printf ptx pwd - readlink realpath rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum - sha512sum shred shuf sleep sort split stat stdbuf stty sum sync tac tail - tee test timeout touch tr true truncate tsort tty uname unexpand uniq - unlink uptime users vdir wc who whoami yes + [ arch b2sum base32 base64 basename basenc cat chcon chgrp chmod chown + chroot cksum comm coreutils cp csplit cut date dd df dir dircolors dirname + du echo env expand expr factor false fmt fold groups head hostid hostname + id install join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp + mv nice nl nohup nproc numfmt od paste pathchk pinky pr printenv printf ptx + pwd readlink realpath rm rmdir runcon seq sha1sum sha224sum sha256sum + sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty sum sync + tac tail tee test timeout touch tr true truncate tsort tty uname unexpand + uniq unlink uptime users vdir wc who whoami yes See the file NEWS for a list of major changes in the current release. diff --git a/build-aux/gen-lists-of-programs.sh b/build-aux/gen-lists-of-programs.sh index cdbcd0a9e..3ec9a6dd1 100755 --- a/build-aux/gen-lists-of-programs.sh +++ b/build-aux/gen-lists-of-programs.sh @@ -45,6 +45,7 @@ normal_progs=' b2sum base64 base32 + basenc basename cat chcon diff --git a/doc/coreutils.texi b/doc/coreutils.texi index f8339d73f..8d303cd56 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -41,6 +41,7 @@ * base32: (coreutils)base32 invocation. Base32 encode/decode data. * base64: (coreutils)base64 invocation. Base64 encode/decode data. * basename: (coreutils)basename invocation. Strip directory and suffix. +* basenc: (coreutils)basenc invocation. Encoding/decoding of data. * cat: (coreutils)cat invocation. Concatenate and write files. * chcon: (coreutils)chcon invocation. Change SELinux CTX of files. * chgrp: (coreutils)chgrp invocation. Change file groups. @@ -184,7 +185,7 @@ Free Documentation License''. @menu * Introduction:: Caveats, overview, and authors * Common options:: Common options -* Output of entire files:: cat tac nl od base32 base64 +* Output of entire files:: cat tac nl od base32 base64 basenc * Formatting file contents:: fmt pr fold * Output of parts of files:: head tail split csplit * Summarizing files:: wc sum cksum b2sum md5sum sha1sum sha2 @@ -242,6 +243,7 @@ Output of entire files * od invocation:: Write files in octal or other formats * base32 invocation:: Transform data into printable data * base64 invocation:: Transform data into printable data +* basenc invocation:: Transform data into printable data Formatting file contents @@ -1589,6 +1591,7 @@ in some way. * od invocation:: Write files in octal or other formats. * base32 invocation:: Transform data into printable data. * base64 invocation:: Transform data into printable data. +* basenc invocation:: Transform data into printable data. @end menu @node cat invocation @@ -2252,6 +2255,132 @@ to permit distorted data to be decoded. @exitstatus +@node basenc invocation +@section @command{basenc}: Transform data into printable data + +@pindex basenc +@cindex base32 encoding + +@command{basenc} transforms data read from a file, or standard input, +into (or from) various common encoding forms. The encoded form uses +printable ASCII characters to represent binary data. + +Synopses: + +@example +basenc @var{encoding} [@var{option}]@dots{} [@var{file}] +basenc @var{encoding} --decode [@var{option}]@dots{} [@var{file}] +@end example + +The @var{encoding} argument is required. If @var{file} is omitted, +reads input from stdin. The @option{-w/--wrap},@option{-i/--ignore-garbage}, +@option{-d/--decode} options of this command are precisely the same as +for @command{base64}. @xref{base64 invocation}. + + +Supported @var{encoding}s are: + +@table @samp + +@item --base64 +@opindex --base64 +Encode into (or decode from with @option{-d/--decode}) base64 form. +The format conforms to +@uref{https://tools.ietf.org/search/rfc4648#section-4, RFC 4648@hashchar{}4}. +Equivalent to the @command{base64} command. + +@item --base64url +@opindex --base64url +Encode into (or decode from with @option{-d/--decode}) file-and-url-safe +base64 form (using @samp{_} and @samp{-} instead of @samp{+} and @samp{/}). +The format conforms to +@uref{https://tools.ietf.org/search/rfc4648#section-5, RFC 4648@hashchar{}5}. + +@item --base32 +@opindex --base32 +Encode into (or decode from with @option{-d/--decode}) base32 form. +The encoded data uses the @samp{ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=} characters. +The format conforms to +@uref{https://tools.ietf.org/search/rfc4648#section-6, RFC 4648@hashchar{}6}. +Equivalent to the @command{base32} command. + +@item --base32hex +@opindex --base32hex +Encode into (or decode from with @option{-d/--decode}) Extended Hex Alphabet +base32 form. The encoded data uses the +@samp{0123456789ABCDEFGHIJKLMNOPQRSTUV=} characters. The format conforms to +@uref{https://tools.ietf.org/search/rfc4648#section-7, RFC 4648@hashchar{}7}. + +@item --base16 +@opindex --base16 +Encode into (or decode from with @option{-d/--decode}) base16 (hexadecimal) +form. The encoded data uses the @samp{0123456789ABCDEF} characters. The format +conforms to +@uref{https://tools.ietf.org/search/rfc4648#section-8, RFC 4648@hashchar{}8}. + +@item --base2lsbf +@opindex --base2lsbf +Encode into (or decode from with @option{-d/--decode}) binary string form +(@samp{0} and @samp{1}) with the @emph{least} significant bit of every byte +first. + +@item --base2msbf +@opindex --base2msbf +Encode into (or decode from with @option{-d/--decode}) binary string form +(@samp{0} and @samp{1}) with the @emph{most} significant bit of every byte +first. + +@item --z85 +@opindex --z85 +Encode into (or decode from with @option{-d/--decode}) Z85 form +(a modified Ascii85 form). The encoded data uses the +@samp{0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTU@ +VWXYZ.-:+=^!/*?&<>()[]@{@}@@%$#}. +characters. The format conforms to +@uref{https://rfc.zeromq.org/spec:32/Z85/, ZeroMQ spec:32/Z85}. + +When encoding with @option{--z85}, input length must be a multiple of 4; +when decoding with @option{--z85}, input length must be a multiple of 5. + +@end table + + + +Encoding/decoding examples: + +@example +$ printf '\376\117\202' | basenc --base64 +/k+C + +$ printf '\376\117\202' | basenc --base64url +_k-C + +$ printf '\376\117\202' | basenc --base32 +7ZHYE=== + +$ printf '\376\117\202' | basenc --base32hex +VP7O4=== + +$ printf '\376\117\202' | basenc --base16 +FE4F82 + +$ printf '\376\117\202' | basenc --base2lsbf +011111111111001001000001 + +$ printf '\376\117\202' | basenc --base2msbf +111111100100111110000010 + +$ printf '\376\117\202\000' | basenc --z85 +@@.FaC + +$ printf 01010100 | basenc --base2msbf --decode +T + +$ printf 01010100 | basenc --base2lsbf --decode +* +@end example + + @node Formatting file contents @chapter Formatting file contents diff --git a/man/.gitignore b/man/.gitignore index 9ef048a4c..4eecb7833 100644 --- a/man/.gitignore +++ b/man/.gitignore @@ -3,6 +3,7 @@ Makefile.in b2sum.1 base32.1 base64.1 +basenc.1 basename.1 cat.1 chgrp.1 diff --git a/man/basenc.x b/man/basenc.x new file mode 100644 index 000000000..1fbabd3c6 --- /dev/null +++ b/man/basenc.x @@ -0,0 +1,38 @@ +'\" Copyright (C) 2018 Free Software Foundation, Inc. +'\" +'\" This is free software. You may redistribute copies of it under the terms +'\" of the GNU General Public License . +'\" There is NO WARRANTY, to the extent permitted by law. +[NAME] +basenc \- Encode/decode data and print to standard output +[DESCRIPTION] +.\" Add any additional description here +[ENCODINGS EXAMPLES] +.PP +.nf +.RS +$ printf '\\376\\117\\202' | basenc \-\-base64 +/k+C + +$ printf '\\376\\117\\202' | basenc \-\-base64url +_k-C + +$ printf '\\376\\117\\202' | basenc \-\-base32 +7ZHYE=== + +$ printf '\\376\\117\\202' | basenc \-\-base32hex +VP7O4=== + +$ printf '\\376\\117\\202' | basenc \-\-base16 +FE4F82 + +$ printf '\\376\\117\\202' | basenc \-\-base2lsbf +011111111111001001000001 + +$ printf '\\376\\117\\202' | basenc \-\-base2msbf +111111100100111110000010 + +$ printf '\\376\\117\\202\\000' | basenc \-\-z85 +@.FaC +.RE +.fi diff --git a/man/local.mk b/man/local.mk index 05d44012d..443942c93 100644 --- a/man/local.mk +++ b/man/local.mk @@ -69,6 +69,7 @@ man/b2sum.1: src/b2sum$(EXEEXT) man/base32.1: src/base32$(EXEEXT) man/base64.1: src/base64$(EXEEXT) man/basename.1: src/basename$(EXEEXT) +man/basenc.1: src/basenc$(EXEEXT) man/cat.1: src/cat$(EXEEXT) man/chcon.1: src/chcon$(EXEEXT) man/chgrp.1: src/chgrp$(EXEEXT) diff --git a/scripts/git-hooks/commit-msg b/scripts/git-hooks/commit-msg index 3dbddc63d..8a4b894ef 100755 --- a/scripts/git-hooks/commit-msg +++ b/scripts/git-hooks/commit-msg @@ -14,10 +14,10 @@ $editor = "vi" if $? != 0 or $editor =~ /^\s*\z/; # Keywords allowed before the colon on the first line of a commit message: # program names and a few general category names. my @valid = qw( - arch b2sum base32 base64 basename cat chcon chgrp chmod chown chroot cksum - comm cp csplit cut date dd df dir dircolors dirname du echo env expand - expr factor false fmt fold groups head hostid hostname id install - join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp + arch b2sum base32 base64 basenc nbasename cat chcon chgrp chmod chown + chroot cksum comm cp csplit cut date dd df dir dircolors dirname du echo + env expand expr factor false fmt fold groups head hostid hostname id + install join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp mv nice nl nohup nproc numfmt od paste pathchk pinky pr printenv printf ptx pwd readlink realpath rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty diff --git a/src/.gitignore b/src/.gitignore index 70ab2cac8..86d82ad4b 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -5,6 +5,7 @@ arch b2sum base32 base64 +basenc basename cat chcon diff --git a/src/basenc.c b/src/basenc.c index 1f5c09aae..49aa35aa0 100644 --- a/src/basenc.c +++ b/src/basenc.c @@ -25,6 +25,7 @@ #include #include "system.h" +#include "c-ctype.h" #include "die.h" #include "error.h" #include "fadvise.h" @@ -33,23 +34,60 @@ #include "xdectoint.h" #include "xbinary-io.h" -#define AUTHORS proper_name ("Simon Josefsson") +#if BASE_TYPE == 42 +# define AUTHORS \ + proper_name ("Simon Josefsson"), \ + proper_name ("Assaf Gordon") +#else +# define AUTHORS proper_name ("Simon Josefsson") +#endif #if BASE_TYPE == 32 # include "base32.h" # define PROGRAM_NAME "base32" -#else +#elif BASE_TYPE == 64 # include "base64.h" # define PROGRAM_NAME "base64" +#elif BASE_TYPE == 42 +# include "base32.h" +# include "base64.h" +# include +# define PROGRAM_NAME "basenc" +#else +# error missing/invalid BASE_TYPE definition #endif + +#if BASE_TYPE == 42 +enum +{ + BASE64_OPTION = CHAR_MAX + 1, + BASE64URL_OPTION, + BASE32_OPTION, + BASE32HEX_OPTION, + BASE16_OPTION, + BASE2MSBF_OPTION, + BASE2LSBF_OPTION, + Z85_OPTION +}; +#endif + static struct option const long_options[] = { {"decode", no_argument, 0, 'd'}, {"wrap", required_argument, 0, 'w'}, {"ignore-garbage", no_argument, 0, 'i'}, - +#if BASE_TYPE == 42 + {"base64", no_argument, 0, BASE64_OPTION}, + {"base64url", no_argument, 0, BASE64URL_OPTION}, + {"base32", no_argument, 0, BASE32_OPTION}, + {"base32hex", no_argument, 0, BASE32HEX_OPTION}, + {"base16", no_argument, 0, BASE16_OPTION}, + {"base2msbf", no_argument, 0, BASE2MSBF_OPTION}, + {"base2lsbf", no_argument, 0, BASE2LSBF_OPTION}, + {"z85", no_argument, 0, Z85_OPTION}, +#endif {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -64,12 +102,43 @@ usage (int status) { printf (_("\ Usage: %s [OPTION]... [FILE]\n\ +"), program_name); + +#if BASE_TYPE == 42 + fputs (_("\ +basenc encode or decode FILE, or standard input, to standard output.\n\ +"), stdout); +#else + printf (_("\ Base%d encode or decode FILE, or standard input, to standard output.\n\ -"), program_name, BASE_TYPE); +"), BASE_TYPE); +#endif emit_stdin_note (); emit_mandatory_arg_note (); - +#if BASE_TYPE == 42 + fputs (_("\ + --base64 same as 'base64' program (RFC4648 section 4)\n\ +"), stdout); + fputs (_("\ + --base64url file- and url-safe base64 (RFC4648 section 5)\n\ +"), stdout); + fputs (_("\ + --base32 same as 'base32' program (RFC4648 section 6)\n\ +"), stdout); + fputs (_("\ + --base32hex extended hex alphabet base32 (RFC4648 section 7)\n\ +"), stdout); + fputs (_("\ + --base16 hex encoding (RFC4648 section 8)\n\ +"), stdout); + fputs (_("\ + --base2msbf bit string with most significant bit (msb) first\n\ +"), stdout); + fputs (_("\ + --base2lsbf bit string with least significant bit (lsb) first\n\ +"), stdout); +#endif fputs (_("\ -d, --decode decode data\n\ -i, --ignore-garbage when decoding, ignore non-alphabet characters\n\ @@ -77,8 +146,23 @@ Base%d encode or decode FILE, or standard input, to standard output.\n\ Use 0 to disable line wrapping\n\ \n\ "), stdout); +#if BASE_TYPE == 42 + fputs (_("\ + --z85 ascii85-like encoding (ZeroMQ spec:32/Z85);\n\ + when encoding, input length must be a multiple of 4;\n\ + when decoding, input length must be a multiple of 5\n\ +"), stdout); +#endif fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); +#if BASE_TYPE == 42 + fputs (_("\ +\n\ +When decoding, the input may contain newlines in addition to the bytes of\n\ +the formal alphabet. Use --ignore-garbage to attempt to recover\n\ +from any other non-alphabet bytes in the encoded stream.\n\ +"), stdout); +#else printf (_("\ \n\ The data are encoded as described for the %s alphabet in RFC 4648.\n\ @@ -86,6 +170,7 @@ When decoding, the input may contain newlines in addition to the bytes of\n\ the formal %s alphabet. Use --ignore-garbage to attempt to recover\n\ from any other non-alphabet bytes in the encoded stream.\n"), PROGRAM_NAME, PROGRAM_NAME); +#endif emit_ancillary_info (PROGRAM_NAME); } @@ -109,7 +194,7 @@ verify (DEC_BLOCKSIZE % 40 == 0); /* So complete encoded blocks are used. */ # define base_decode_ctx_init base32_decode_ctx_init # define base_decode_ctx base32_decode_ctx # define isbase isbase32 -#else +#elif BASE_TYPE == 64 # define BASE_LENGTH BASE64_LENGTH /* Note that increasing this may decrease performance if --ignore-garbage is used, because of the memmove operation below. */ @@ -124,8 +209,736 @@ verify (DEC_BLOCKSIZE % 12 == 0); /* So complete encoded blocks are used. */ # define base_decode_ctx_init base64_decode_ctx_init # define base_decode_ctx base64_decode_ctx # define isbase isbase64 +#elif BASE_TYPE == 42 + + +# define BASE_LENGTH base_length + +/* Note that increasing this may decrease performance if --ignore-garbage + is used, because of the memmove operation below. */ +# define DEC_BLOCKSIZE (1024*5) + +static int (*base_length) (int i); +static bool (*isbase) (char ch); +static void (*base_encode) (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen); + +struct base16_decode_context +{ + char nibble; + bool have_nibble; +}; + +struct z85_decode_context +{ + int i; + unsigned char octets[5]; +}; + +struct base2_decode_context +{ + unsigned octet; +}; + +struct base_decode_context +{ + int i; /* will be updated manually */ + union { + struct base64_decode_context b64ctx; + struct base32_decode_context b32ctx; + struct base16_decode_context b16ctx; + struct base2_decode_context b2ctx; + struct z85_decode_context z85ctx; + }; + char *inbuf; + size_t bufsize; +}; +static void (*base_decode_ctx_init) (struct base_decode_context *ctx); +static bool (*base_decode_ctx) (struct base_decode_context *ctx, +const char *restrict in, size_t inlen, +char *restrict out, size_t *outlen); #endif + + + +#if BASE_TYPE == 42 + +static int +base64_length_wrapper (int len) +{ + return BASE64_LENGTH (len); +} + +static void +base64_decode_ctx_init_wrapper (struct base_decode_context *ctx) +{ + base64_decode_ctx_init (&ctx->b64ctx); +} + +static bool +base64_decode_ctx_wrapper (struct base_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + bool b = base64_decode_ctx (&ctx->b64ctx, in, inlen, out, outlen); + ctx->i = ctx->b64ctx.i; + return b; +} + +static void +init_inbuf (struct base_decode_context *ctx) +{ + ctx->bufsize = DEC_BLOCKSIZE; + ctx->inbuf = xcharalloc (ctx->bufsize); +} + +static void +prepare_inbuf (struct base_decode_context *ctx, size_t inlen) +{ + if (ctx->bufsize < inlen) + { + ctx->bufsize = inlen*2; + ctx->inbuf = xnrealloc (ctx->inbuf, ctx->bufsize, sizeof (char) ); + } +} + + +static void +base64url_encode (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen) +{ + base64_encode (in, inlen, out, outlen); + /* translate 62nd and 63rd characters */ + char* p = out; + while (outlen--) + { + if (*p == '+') + *p = '-'; + else if (*p == '/') + *p = '_'; + ++p; + } +} + +static bool +isbase64url (char ch) +{ + return ( (ch == '-') || (ch == '_') \ + || ( (ch != '+') && (ch != '/') && isbase64 (ch) ) ); +} + +static void +base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx) +{ + base64_decode_ctx_init (&ctx->b64ctx); + init_inbuf (ctx); +} + + +static bool +base64url_decode_ctx_wrapper (struct base_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + prepare_inbuf (ctx, inlen); + memcpy (ctx->inbuf, in, inlen); + + /* translate 62nd and 63rd characters */ + size_t i = inlen; + char* p = ctx->inbuf; + while (i--) + { + if (*p == '+' || *p =='/') + { + *outlen = 0; + return false; /* reject base64 input */ + } + else if (*p == '-') + *p = '+'; + else if (*p == '_') + *p = '/'; + ++p; + } + + bool b = base64_decode_ctx (&ctx->b64ctx, ctx->inbuf, inlen, out, outlen); + ctx->i = ctx->b64ctx.i; + + return b; +} + + + +static int +base32_length_wrapper (int len) +{ + return BASE32_LENGTH (len); +} + +static void +base32_decode_ctx_init_wrapper (struct base_decode_context *ctx) +{ + base32_decode_ctx_init (&ctx->b32ctx); +} + +static bool +base32_decode_ctx_wrapper (struct base_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + bool b = base32_decode_ctx (&ctx->b32ctx, in, inlen, out, outlen); + ctx->i = ctx->b32ctx.i; + return b; +} + +/* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 + to + 0123456789ABCDEFGHIJKLMNOPQRSTUV */ +static const char base32_norm_to_hex[32+9] = { +/*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */ + 'Q', 'R', 'S', 'T', 'U', 'V', + + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, + +/*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */ + '0', '1', '2', '3', '4', '5', '6', '7', + +/*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */ + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + +/*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */ + 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', + +/*0x59, 0x5a, */ + 'O', 'P', +}; + +/* 0123456789ABCDEFGHIJKLMNOPQRSTUV + to + ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */ +static const char base32_hex_to_norm[32+9] = { + /* from: 0x30 .. 0x39 ('0' to '9') */ + /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + + 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, + + /* from: 0x41 .. 0x4A ('A' to 'J') */ + /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + + /* from: 0x4B .. 0x54 ('K' to 'T') */ + /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5', + + /* from: 0x55 .. 0x56 ('U' to 'V') */ + /* to:*/ '6', '7' +}; + + +inline static bool +isbase32hex (char ch) +{ + return ( (ch>='0' && ch<='9') || (ch>='A' && ch<='V') ); +} + + +static void +base32hex_encode (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen) +{ + base32_encode (in, inlen, out, outlen); + + char* p = out; + while (outlen--) + { + assert (*p >= 0x32 && *p <= 0x5a); /* LCOV_EXCL_LINE */ + *p = base32_norm_to_hex[(int)*p - 0x32]; + ++p; + } +} + + +static void +base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx) +{ + base32_decode_ctx_init (&ctx->b32ctx); + init_inbuf (ctx); +} + + +static bool +base32hex_decode_ctx_wrapper (struct base_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + prepare_inbuf (ctx, inlen); + + size_t i = inlen; + char *p = ctx->inbuf; + while (i--) + { + if (isbase32hex (*in)) + *p = base32_hex_to_norm[ (int)*in - 0x30]; + else + *p = *in; + ++p; + ++in; + } + + bool b = base32_decode_ctx (&ctx->b32ctx, ctx->inbuf, inlen, out, outlen); + ctx->i = ctx->b32ctx.i; + + return b; +} + + +static bool +isbase16 (char ch) +{ + return ( (ch>='0' && ch<='9') || (ch>='A' && ch<='F') ); +} + +static int +base16_length (int len) +{ + return len*2; +} + +static const char base16[16] = { "0123456789ABCDEF" }; + +static void +base16_encode (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen) +{ + while (inlen--) + { + *out++ = base16[ ( ((unsigned char)*in) >> 4) ]; + *out++ = base16[ ( ((unsigned char)*in) & 0x0F ) ]; + ++in; + } +} + + +static void +base16_decode_ctx_init (struct base_decode_context *ctx) +{ + init_inbuf (ctx); + ctx->b16ctx.have_nibble = false; + ctx->i = 1; +} + + +static bool +base16_decode_ctx (struct base_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + bool ignore_lines = true; /* for now, always ignore them */ + unsigned int nib; + + *outlen = 0; + + /* inlen==0 is request to flush output. + if there is a dangling high nibble - we are missing the low nibble, + so return false - indicating an invalid input. */ + if (inlen == 0) + return !ctx->b16ctx.have_nibble; + + while (inlen--) + { + if (ignore_lines && *in == '\n') + { + ++in; + continue; + } + + if (*in >= 'A' && *in <= 'F') + nib = (*in-'A'+10); + else if (*in >= '0' && *in <= '9') + nib = (*in-'0'); + else + return false; /* garbage - return false */ + + ++in; + + if (ctx->b16ctx.have_nibble) + { + /* have both nibbles, write octet */ + *out++ = (ctx->b16ctx.nibble<<4) + nib; + ++(*outlen); + } + else + { + /* Store higher nibble until next one arrives */ + ctx->b16ctx.nibble = nib; + } + ctx->b16ctx.have_nibble = !ctx->b16ctx.have_nibble; + } + return true; +} + + + + +static int +z85_length (int len) +{ + /* Z85 does not allow padding, so no need to round to highest integer. */ + int outlen = (len*5)/4; + return outlen; +} + +static bool +isz85 (char ch) +{ + return c_isalnum (ch) || (strchr (".-:+=^!/*?&<>()[]{}@%$#", ch) != NULL); +} + +static char z85_encoding[85] = { + "0123456789" \ + "abcdefghij" \ + "klmnopqrst" \ + "uvwxyzABCD" \ + "EFGHIJKLMN" \ + "OPQRSTUVWX" \ + "YZ.-:+=^!/" \ + "*?&<>()[]{" \ + "}@%$#" +}; + +static void +z85_encode (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen) +{ + int i = 0; + unsigned char quad[4]; + unsigned int val; + size_t outidx = 0; + + while (1) + { + if (inlen == 0) + { + /* no more input, exactly on 4 octet boundary. */ + if (i ==0) + return; + + /* currently, there's no way to return an error in encoding. */ + die (EXIT_FAILURE, 0, + _("invalid input (length must be multiple of 4 characters)")); + } + else + { + quad[i++] = (unsigned char)*in++; + --inlen; + } + + /* Got a quad, encode it */ + if (i==4) + { + val = (quad[0]<<24) + (quad[1]<<16) + (quad[2]<<8) + quad[3]; + + for (int j = 4; j>=0; --j) + { + unsigned char c = val%85; + val /= 85; + + /* NOTE: if there is padding (which is trimmed by z85 + before outputting the result), the output buffer 'out' + might not include enough allocated bytes for the padding, + so don't store them. */ + if (outidx + j < outlen) + out[j] = z85_encoding[c]; + } + out += 5; + outidx += 5; + i = 0; + } + } +} + +static void +z85_decode_ctx_init (struct base_decode_context *ctx) +{ + init_inbuf (ctx); + ctx->z85ctx.i = 0; + ctx->i = 1; +} + + +# define Z85_LO_CTX_TO_32BIT_VAL(ctx) \ + (((ctx)->z85ctx.octets[1] * 85 * 85 * 85) + \ + ((ctx)->z85ctx.octets[2] * 85 * 85) + \ + ((ctx)->z85ctx.octets[3] * 85) + \ + ((ctx)->z85ctx.octets[4])) + + +# define Z85_HI_CTX_TO_32BIT_VAL(ctx) \ + ((ctx)->z85ctx.octets[0] * 85 * 85 * 85 * 85 ) + +/* + 0 - 9: 0 1 2 3 4 5 6 7 8 9 + 10 - 19: a b c d e f g h i j + 20 - 29: k l m n o p q r s t + 30 - 39: u v w x y z A B C D + 40 - 49: E F G H I J K L M N + 50 - 59: O P Q R S T U V W X + 60 - 69: Y Z . - : + = ^ ! / #dummy comment to workaround syntax-check + 70 - 79: * ? & < > ( ) [ ] { + 80 - 84: } @ % $ # +*/ +static unsigned char z85_decoding[93] = { + 68, 255, 84, 83, 82, 72, 255, /* ! " # $ % & ' */ + 75, 76, 70, 65, 255, 63, 62, 69, /* ( ) * + , - . / */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* '0' to '9' */ + 64, 255, 73, 66, 74, 71, 81, /* : ; < = > ? @ */ + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 'A' to 'J' */ + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 'K' to 'T' */ + 56, 57, 58, 59, 60, 61, /* 'U' to 'Z' */ + 77, 255, 78, 67, 255, 255, /* [ \ ] ^ _ ` */ + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 'a' to 'j' */ + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 'k' to 't' */ + 30, 31, 32, 33, 34, 35, /* 'u' to 'z' */ + 79, 255, 80 /* { | } */ +}; + +static bool +z85_decode_ctx (struct base_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + bool ignore_lines = true; /* for now, always ignore them */ + unsigned char c; + + *outlen = 0; + + /* inlen==0 is request to flush output. + if there are dangling values - we are missing entries, + so return false - indicating an invalid input. */ + if (inlen == 0) + { + if (ctx->z85ctx.i > 0) + { + /* Z85 variant does not allow padding - input must + be a multiple of 5 - so return error. */ + return false; + } + return true; + } + + while (inlen--) + { + if (ignore_lines && *in == '\n') + { + ++in; + continue; + } + + /* z85 decoding */ + c = (unsigned char)(*in); + + if (c >= 33 && c <= 125) + { + c = z85_decoding[c-33]; + if (c == 255) + return false; /* garbage - return false */ + } + else + return false; /* garbage - return false */ + + ++in; + + ctx->z85ctx.octets[ctx->z85ctx.i++] = c; + if (ctx->z85ctx.i == 5) + { + /* decode the lowest 4 octets, then check for overflows. */ + unsigned int val = Z85_LO_CTX_TO_32BIT_VAL (ctx); + + /* The Z85 spec and the reference implementation say nothing + about overflows. To be on the safe side, reject them. + + '$' (decoded to 83) in the highest octet + would result in value of 83*85^4 = 4332651875 , which is larger + than 2^32-1 and will overflow an unsigned int (similarly + for '$' decoded to 84). + + '%' (decoded to 82) in the highest octet can fit in unsigned int + if the other 4 octets decode to a small enough value. + */ + if ((ctx->z85ctx.octets[0] == 84 || ctx->z85ctx.octets[0] == 83) \ + || (ctx->z85ctx.octets[0] == 82 \ + && (val > 0xFFFFFFFF - 82*85*85*85*85U))) + return false; + + /* no overflow, add the high octet value */ + val += Z85_HI_CTX_TO_32BIT_VAL (ctx); + + *out++ = (val>>24)&0xFF; + *out++ = (val>>16)&0xFF; + *out++ = (val>>8)&0xFF; + *out++ = (val)&0xFF; + + *outlen += 4; + + ctx->z85ctx.i = 0; + } + } + ctx->i = ctx->z85ctx.i; + return true; +} + + +inline static bool +isbase2 (char ch) +{ + return (ch=='0' || ch=='1'); +} + +static int +base2_length (int len) +{ + return len*8; +} + + +inline static void +base2msbf_encode (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen) +{ + unsigned char c; + while (inlen--) + { + c = (unsigned char)*in; + for (int i=0;i<8;++i) + { + *out++ = (c & 0x80)?'1':'0'; + c <<= 1; + } + outlen -= 8; + ++in; + } +} + +inline static void +base2lsbf_encode (const char *restrict in, size_t inlen, + char *restrict out, size_t outlen) +{ + unsigned char c; + while (inlen--) + { + c = (unsigned char)*in; + for (int i=0;i<8;++i) + { + *out++ = (c & 0x01)?'1':'0'; + c >>= 1; + } + outlen -= 8; + ++in; + } +} + + +static void +base2_decode_ctx_init (struct base_decode_context *ctx) +{ + init_inbuf (ctx); + ctx->b2ctx.octet = 0; + ctx->i = 0; +} + + +static bool +base2lsbf_decode_ctx (struct base_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + bool ignore_lines = true; /* for now, always ignore them */ + + *outlen = 0; + + /* inlen==0 is request to flush output. + if there is a dangling bit - we are missing some bits, + so return false - indicating an invalid input. */ + if (inlen == 0) + return ctx->i==0; + + while (inlen--) + { + if (ignore_lines && *in == '\n') + { + ++in; + continue; + } + + if (!isbase2 (*in)) + return false; + + bool bit = (*in == '1'); + ctx->b2ctx.octet |= bit << ctx->i; + ++ctx->i; + + if (ctx->i==8) + { + *out++ = ctx->b2ctx.octet ; + ctx->b2ctx.octet = 0; + ++*outlen; + ctx->i = 0; + } + + ++in; + } + + return true; +} + +static bool +base2msbf_decode_ctx (struct base_decode_context *ctx, + const char *restrict in, size_t inlen, + char *restrict out, size_t *outlen) +{ + bool ignore_lines = true; /* for now, always ignore them */ + + *outlen = 0; + + /* inlen==0 is request to flush output. + if there is a dangling bit - we are missing some bits, + so return false - indicating an invalid input. */ + if (inlen == 0) + return ctx->i==0; + + while (inlen--) + { + if (ignore_lines && *in == '\n') + { + ++in; + continue; + } + + if (!isbase2 (*in)) + return false; + + bool bit = (*in == '1'); + if (ctx->i == 0) + ctx->i = 8; + --ctx->i; + ctx->b2ctx.octet |= bit << ctx->i; + + if (ctx->i==0) + { + *out++ = ctx->b2ctx.octet ; + ctx->b2ctx.octet = 0; + ++*outlen; + ctx->i = 0; + } + + ++in; + } + + return true; +} + +#endif /* BASE_TYPE == 42, i.e., "basenc"*/ + + + static void wrap_write (const char *buffer, size_t len, uintmax_t wrap_column, size_t *current_column, FILE *out) @@ -209,6 +1022,9 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage) size_t sum; struct base_decode_context ctx; +#if BASE_TYPE == 42 + ctx.inbuf = NULL; +#endif base_decode_ctx_init (&ctx); do @@ -259,6 +1075,10 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage) } } while (!feof (in)); + +#if BASE_TYPE == 42 + IF_LINT (free (ctx.inbuf)); +#endif } int @@ -275,6 +1095,10 @@ main (int argc, char **argv) /* Wrap encoded data around the 76:th column, by default. */ uintmax_t wrap_column = 76; +#if BASE_TYPE == 42 + int base_type = 0; +#endif + initialize_main (&argc, &argv); set_program_name (argv[0]); setlocale (LC_ALL, ""); @@ -299,6 +1123,19 @@ main (int argc, char **argv) ignore_garbage = true; break; +#if BASE_TYPE == 42 + case BASE64_OPTION: + case BASE64URL_OPTION: + case BASE32_OPTION: + case BASE32HEX_OPTION: + case BASE16_OPTION: + case BASE2MSBF_OPTION: + case BASE2LSBF_OPTION: + case Z85_OPTION: + base_type = opt; + break; +#endif + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); @@ -308,6 +1145,79 @@ main (int argc, char **argv) break; } +#if BASE_TYPE == 42 + switch (base_type) + { + case BASE64_OPTION: + base_length = base64_length_wrapper; + isbase = isbase64; + base_encode = base64_encode; + base_decode_ctx_init = base64_decode_ctx_init_wrapper; + base_decode_ctx = base64_decode_ctx_wrapper; + break; + + case BASE64URL_OPTION: + base_length = base64_length_wrapper; + isbase = isbase64url; + base_encode = base64url_encode; + base_decode_ctx_init = base64url_decode_ctx_init_wrapper; + base_decode_ctx = base64url_decode_ctx_wrapper; + break; + + case BASE32_OPTION: + base_length = base32_length_wrapper; + isbase = isbase32; + base_encode = base32_encode; + base_decode_ctx_init = base32_decode_ctx_init_wrapper; + base_decode_ctx = base32_decode_ctx_wrapper; + break; + + case BASE32HEX_OPTION: + base_length = base32_length_wrapper; + isbase = isbase32hex; + base_encode = base32hex_encode; + base_decode_ctx_init = base32hex_decode_ctx_init_wrapper; + base_decode_ctx = base32hex_decode_ctx_wrapper; + break; + + case BASE16_OPTION: + base_length = base16_length; + isbase = isbase16; + base_encode = base16_encode; + base_decode_ctx_init = base16_decode_ctx_init; + base_decode_ctx = base16_decode_ctx; + break; + + case BASE2MSBF_OPTION: + base_length = base2_length; + isbase = isbase2; + base_encode = base2msbf_encode; + base_decode_ctx_init = base2_decode_ctx_init; + base_decode_ctx = base2msbf_decode_ctx; + break; + + case BASE2LSBF_OPTION: + base_length = base2_length; + isbase = isbase2; + base_encode = base2lsbf_encode; + base_decode_ctx_init = base2_decode_ctx_init; + base_decode_ctx = base2lsbf_decode_ctx; + break; + + case Z85_OPTION: + base_length = z85_length; + isbase = isz85; + base_encode = z85_encode; + base_decode_ctx_init = z85_decode_ctx_init; + base_decode_ctx = z85_decode_ctx; + break; + + default: + error (0, 0, _("missing encoding type")); + usage (EXIT_FAILURE); + } +#endif + if (argc - optind > 1) { error (0, 0, _("extra operand %s"), quote (argv[optind])); diff --git a/src/local.mk b/src/local.mk index 5b9baad45..345e6f556 100644 --- a/src/local.mk +++ b/src/local.mk @@ -97,6 +97,7 @@ LDADD = src/libver.a lib/libcoreutils.a $(LIBINTL) lib/libcoreutils.a src_arch_LDADD = $(LDADD) src_base64_LDADD = $(LDADD) src_base32_LDADD = $(LDADD) +src_basenc_LDADD = $(LDADD) src_basename_LDADD = $(LDADD) src_cat_LDADD = $(LDADD) src_chcon_LDADD = $(LDADD) @@ -414,6 +415,8 @@ src_base64_SOURCES = src/basenc.c src_base64_CPPFLAGS = -DBASE_TYPE=64 $(AM_CPPFLAGS) src_base32_SOURCES = src/basenc.c src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) +src_basenc_SOURCES = src/basenc.c +src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS) src_ginstall_CPPFLAGS = -DENABLE_MATCHPATHCON=1 $(AM_CPPFLAGS) diff --git a/tests/local.mk b/tests/local.mk index 5823886f8..d2fdb7e86 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -278,6 +278,7 @@ all_tests = \ tests/misc/cat-self.sh \ tests/misc/base64.pl \ tests/misc/basename.pl \ + tests/misc/basenc.pl \ tests/misc/close-stdout.sh \ tests/misc/chroot-fail.sh \ tests/misc/comm.pl \ diff --git a/tests/misc/basenc.pl b/tests/misc/basenc.pl new file mode 100755 index 000000000..e3bba74a9 --- /dev/null +++ b/tests/misc/basenc.pl @@ -0,0 +1,284 @@ +#!/usr/bin/perl +# Exercise basenc. + +# Copyright (C) 2006-2018 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This test exercises the various encoding (other than base64/32). +# It also does not test the general options (e.g. --wrap), as that code is +# shared and tested in base64. + +use strict; + +(my $program_name = $0) =~ s|.*/||; +my $prog = 'basenc'; + +# Turn off localization of executable's output. +@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + + +my $base64_in = "\x54\x0f\xdc\xf0\x0f\xaf\x4a"; +my $base64_out = "VA/c8A+vSg=="; +my $base64url_out = $base64_out; +$base64url_out =~ y|+/|-_|; +my $base64url_out_nl = $base64url_out; +$base64url_out_nl =~ s/(..)/\1\n/g; # add newline every two characters + + +my $base32_in = "\xfd\xd8\x07\xd1\xa5"; +my $base32_out = "7XMAPUNF"; +my $x = $base32_out; +$x =~ y|ABCDEFGHIJKLMNOPQRSTUVWXYZ234567|0123456789ABCDEFGHIJKLMNOPQRSTUV|; +my $base32hex_out = $x; + +# base32 with padding and newline +my $base32_in2 = "\xFF\x00"; +my $base32_out2 = "74AA===="; +$x = $base32_out2; +$x =~ y|ABCDEFGHIJKLMNOPQRSTUVWXYZ234567|0123456789ABCDEFGHIJKLMNOPQRSTUV|; +my $base32hex_out2 = $x; +my $base32hex_out2_nl = $x; +$base32hex_out2_nl =~ s/(...)/\1\n/g; # Add newline every 3 characters + +my $base16_in = "\xfd\xd8\x07\xd1\xa5"; +my $base16_out = "FDD807D1A5"; + +my $z85_in = "\x86\x4F\xD2\x6F\xB5\x59\xF7\x5B"; +my $z85_out = 'HelloWorld'; + +my $base2lsbf_ab = "1000011001000110"; +my $base2lsbf_ab_nl = $base2lsbf_ab; +$base2lsbf_ab_nl =~ s/(...)/\1\n/g; # Add newline every 3 characters +my $base2msbf_ab = "0110000101100010"; +my $base2msbf_ab_nl = $base2msbf_ab; +$base2msbf_ab_nl =~ s/(...)/\1\n/g; # Add newline every 3 characters + +my $try_help = "Try '$prog --help' for more information.\n"; + +my @Tests = +( + # These are mainly for higher coverage + ['help', '--help', {IN=>''}, {OUT=>""}, {OUT_SUBST=>'s/.*//sm'}], + + # Typical message is " unrecognized option '--foobar'", but on + # Open/NetBSD it is " unknown option -- foobar". + ['error', '--foobar', {IN=>''}, {OUT=>""}, {EXIT=>1}, + {ERR=>"$prog: foobar\n" . $try_help }, + {ERR_SUBST=>"s/(unrecognized|unknown) option [-' ]*foobar[' ]*/foobar/"}], + + ['noenc', '', {IN=>''}, {EXIT=>1}, + {ERR=>"$prog: missing encoding type\n" . $try_help }], + + ## TODO: Fix to "B" (this is a bug in the original base64.c:1064 + ['extra', '--base64 A B', {IN=>''}, {EXIT=>1}, + {ERR=>"$prog: extra operand 'A'\n" . $try_help}], + + + ['empty1', '--base64', {IN=>''}, {OUT=>""}], + ['empty2', '--base64url', {IN=>''}, {OUT=>""}], + ['empty3', '--base32', {IN=>''}, {OUT=>""}], + ['empty4', '--base32hex', {IN=>''}, {OUT=>""}], + ['empty5', '--base16', {IN=>''}, {OUT=>""}], + ['empty6', '--base2msbf', {IN=>''}, {OUT=>""}], + ['empty7', '--base2lsbf', {IN=>''}, {OUT=>""}], + ['empty8', '--z85', {IN=>''}, {OUT=>""}], + + + + + ['b64_1', '--base64', {IN=>$base64_in}, {OUT=>$base64_out}], + ['b64_2', '--base64 -d', {IN=>$base64_out}, {OUT=>$base64_in}], + ['b64_3', '--base64 -d -i', {IN=>'&'.$base64_out},{OUT=>$base64_in}], + + ['b64u_1', '--base64url', {IN=>$base64_in}, {OUT=>$base64url_out}], + ['b64u_2', '--base64url -d', {IN=>$base64url_out}, {OUT=>$base64_in}], + ['b64u_3', '--base64url -di', {IN=>'&'.$base64url_out} , {OUT=>$base64_in}], + ['b64u_4', '--base64url -di', {IN=>'/'.$base64url_out.'+'},{OUT=>$base64_in}], + ['b64u_5', '--base64url -d', {IN=>$base64url_out_nl}, {OUT=>$base64_in}], + ['b64u_6', '--base64url -di', {IN=>$base64url_out_nl}, {OUT=>$base64_in}], + # ensure base64url fails to decode base64 input with "+" and "/" + ['b64u_7', '--base64url -d', {IN=>$base64_out}, + {EXIT=>1}, {ERR=>"$prog: invalid input\n"}], + + + + + ['b32_1', '--base32', {IN=>$base32_in}, {OUT=>$base32_out}], + ['b32_2', '--base32 -d', {IN=>$base32_out}, {OUT=>$base32_in}], + ['b32_3', '--base32 -d -i', {IN=>'&'.$base32_out},{OUT=>$base32_in}], + ['b32_4', '--base32', {IN=>$base32_in2}, {OUT=>$base32_out2}], + ['b32_5', '--base32 -d', {IN=>$base32_out2}, {OUT=>$base32_in2}], + ['b32_6', '--base32 -d -i', {IN=>$base32_out2}, {OUT=>$base32_in2}], + + + + ['b32h_1', '--base32hex', {IN=>$base32_in}, {OUT=>$base32hex_out}], + ['b32h_2', '--base32hex -d', {IN=>$base32hex_out}, {OUT=>$base32_in}], + ['b32h_3', '--base32hex -d -i', {IN=>'/'.$base32hex_out}, {OUT=>$base32_in}], + ['b32h_4', '--base32hex -d -i', {IN=>'W'.$base32hex_out}, {OUT=>$base32_in}], + ['b32h_5', '--base32hex -d', {IN=>$base32hex_out.'W'}, , {OUT=>$base32_in}, + {EXIT=>1}, {ERR=>"$prog: invalid input\n"}], + ['b32h_6', '--base32hex -d', {IN=>$base32hex_out.'/'}, {OUT=>$base32_in}, + {EXIT=>1}, {ERR=>"$prog: invalid input\n"}], + ['b32h_7', '--base32hex', {IN=>$base32_in2}, {OUT=>$base32hex_out2}], + ['b32h_8', '--base32hex -d', {IN=>$base32hex_out2}, {OUT=>$base32_in2}], + ['b32h_9', '--base32hex -di', {IN=>$base32hex_out2}, {OUT=>$base32_in2}], + ['b32h_10', '--base32hex -d', {IN=>$base32hex_out2_nl}, {OUT=>$base32_in2}], + ['b32h_11', '--base32hex -di', {IN=>$base32hex_out2_nl}, {OUT=>$base32_in2}], + + + + ['b16_1', '--base16', {IN=>$base16_in}, {OUT=>$base16_out}], + ['b16_2', '--base16 -d', {IN=>$base16_out}, {OUT=>$base16_in}], + ['b16_3', '--base16 -d -i', {IN=>'&'. $base16_out}, {OUT=>$base16_in}], + ['b16_4', '--base16 -d -i', {IN=>$base16_out.'G'}, {OUT=>$base16_in}], + ['b16_5', '--base16 -d', {IN=>'.'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b16_6', '--base16 -d', {IN=>'='}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b16_7', '--base16 -d', {IN=>'G'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b16_8', '--base16 -d', {IN=>"AB\nCD"}, {OUT=>"\xAB\xCD"}], + + + + ['b2m_1', '--base2m', {IN=>"\xC1"}, {OUT=>"11000001"}], + ['b2m_2', '--base2m -d', {IN=>'11000001'}, {OUT=>"\xC1"}], + ['b2m_3', '--base2m -d', {IN=>"110\n00001"}, {OUT=>"\xC1"}], + ['b2m_4', '--base2m -di', {IN=>"110x00001"}, {OUT=>"\xC1"}], + ['b2m_5', '--base2m -d', {IN=>"110x00001"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2m_6', '--base2m -d', {IN=>"11000001x"}, {OUT=>"\xC1"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2m_7', '--base2m -d', {IN=>"1"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2m_8', '--base2m -d', {IN=>"1000100"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2m_9', '--base2m -d', {IN=>"100010000000000"}, {OUT=>"\x88"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2m_10','--base2m', {IN=>"ab"}, {OUT=>$base2msbf_ab}], + ['b2m_11','--base2m -d', {IN=>$base2msbf_ab}, {OUT=>"ab"}], + ['b2m_12','--base2m -d', {IN=>$base2msbf_ab_nl}, {OUT=>"ab"}], + + + ['b2l_1', '--base2l', {IN=>"\x83"}, {OUT=>"11000001"}], + ['b2l_2', '--base2l -d', {IN=>'11000001'}, {OUT=>"\x83"}], + ['b2l_3', '--base2l -d', {IN=>"110\n00001"}, {OUT=>"\x83"}], + ['b2l_4', '--base2l -di', {IN=>"110x00001"}, {OUT=>"\x83"}], + ['b2l_5', '--base2l -d', {IN=>"110x00001"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2l_6', '--base2l -d', {IN=>"11000001x"}, {OUT=>"\x83"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2l_7', '--base2l -d', {IN=>"1"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2l_8', '--base2l -d', {IN=>"1000100"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2l_9', '--base2l -d', {IN=>"100010000000000"}, {OUT=>"\x11"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['b2l_10','--base2l', {IN=>"ab"}, {OUT=>$base2lsbf_ab}], + ['b2l_11','--base2l -d', {IN=>$base2lsbf_ab}, {OUT=>"ab"}], + ['b2l_12','--base2l -d', {IN=>$base2lsbf_ab_nl}, {OUT=>"ab"}], + + + + + + ['z85_1', '--z85', {IN=>$z85_in}, {OUT=>$z85_out}], + ['z85_2', '--z85 -d', {IN=>$z85_out}, {OUT=>$z85_in}], + ['z85_3', '--z85 -d -i', {IN=>'~'. $z85_out}, {OUT=>$z85_in}], + ['z85_4', '--z85 -d -i', {IN=>' '. $z85_out}, {OUT=>$z85_in}], + ['z85_5', '--z85 -d', {IN=>'%j$qP'}, {OUT=>"\xFF\xDD\xBB\x99"}], + ['z85_6', '--z85 -d -i', {IN=>'%j~$qP'}, {OUT=>"\xFF\xDD\xBB\x99"}], + + # z85 encoding require input to be multiple of 5 octets + ['z85_7', '--z85 -d', {IN=>'hello'}, {OUT=>"5jXu"}], + ['z85_8', '--z85 -d', {IN=>'helloX'}, {OUT=>"5jXu"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_9', '--z85 -d', {IN=>"he\nl\nlo"}, {OUT=>"5jXu"}], + + # Invalid input characters (space ~ ") + ['z85_10', '--z85 -d', {IN=>' j$qP'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_11', '--z85 -d', {IN=>'%j$q~'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_12', '--z85 -d', {IN=>'%j$"P'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + + # Invalid length (binary input must be a multiple of 4 octets, + # z85-encoded input must be a multiple of 5 octets) + ['z85_20', '--z85', {IN=>'A'}, {EXIT=>1}, + {ERR=>"$prog: invalid input (length must be multiple of 4 characters)\n"}], + ['z85_21', '--z85', {IN=>'AB'}, {EXIT=>1}, + {ERR=>"$prog: invalid input (length must be multiple of 4 characters)\n"}], + ['z85_22', '--z85', {IN=>'ABC'}, {EXIT=>1}, + {ERR=>"$prog: invalid input (length must be multiple of 4 characters)\n"}], + ['z85_23', '--z85', {IN=>'ABCD'}, {OUT=>'k%^}b'}], + ['z85_24', '--z85', {IN=>'ABCDE'}, {EXIT=>1}, + {ERR=>"$prog: invalid input (length must be multiple of 4 characters)\n"}], + + ['z85_30', '--z85 -d', {IN=>'A'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_31', '--z85 -d', {IN=>'AB'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_32', '--z85 -d', {IN=>'ABC'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_33', '--z85 -d', {IN=>'ABCD'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_34', '--z85 -d', {IN=>'ABCDE'}, {OUT=>"\x71\x61\x9e\xb6"}], + ['z85_35', '--z85 -d', {IN=>'ABCDEF'},{OUT=>"\x71\x61\x9e\xb6"}, + {EXIT=>1}, {ERR=>"$prog: invalid input\n"}], + + # largest possible value + ['z85_40', '--z85', {IN=>"\xFF\xFF\xFF\xFF"},{OUT=>"%nSc0"}], + ['z85_41', '--z85 -d', {IN=>"%nSc0"}, {OUT=>"\xFF\xFF\xFF\xFF"}], + # Invalid encoded data - will decode to more than 0xFFFFFFFF + ['z85_42', '--z85 -d', {IN=>"%nSc1"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_43', '--z85 -d', {IN=>"%nSd0"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_44', '--z85 -d', {IN=>"%nTc0"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_45', '--z85 -d', {IN=>"%oSc0"}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_46', '--z85 -d', {IN=>'$nSc0'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], + ['z85_47', '--z85 -d', {IN=>'#0000'}, {EXIT=>1}, + {ERR=>"$prog: invalid input\n"}], +); + +# Prepend the command line argument and append a newline to end +# of each expected 'OUT' string. +my $t; + +Test: +foreach $t (@Tests) + { + foreach my $e (@$t) + { + ref $e && ref $e eq 'HASH' && defined $e->{OUT_SUBST} + and next Test; + } + + push @$t, {OUT_SUBST=>'s/\n$//s'}; + } + + + +my $save_temps = $ENV{DEBUG}; +my $verbose = $ENV{VERBOSE}; + +my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); + +exit $fail; diff --git a/tests/misc/help-version.sh b/tests/misc/help-version.sh index 46a518a0c..b8512a4fc 100755 --- a/tests/misc/help-version.sh +++ b/tests/misc/help-version.sh @@ -184,6 +184,7 @@ defid_setup () { args=t; } basename_setup () { args=$tmp_in; } dirname_setup () { args=$tmp_in; } expr_setup () { args=foo; } +basenc_setup () { args=--version; } # Punt, in case GNU 'id' hasn't been installed yet. groups_setup () { args=--version; }