mirror of
git://git.sv.gnu.org/sed
synced 2025-04-18 02:37:37 +03:00
maint: ensure that new "make syntax-check"-run sc_codespell passes
* cfg.mk (codespell_ignore_words_list): Ignore false-positives. (exclude_file_name_regexp--sc_codespell): Skip some file names. * sed/debug.c: Fix typo(s). * sed/mbcs.c: Likewise. * sed/sed.c: Likewise. * testsuite/badenc.sh: Likewise. * testsuite/compile-errors.sh: Likewise. * testsuite/distrib.inp: Likewise. * testsuite/distrib.sh: Likewise. * testsuite/execute-tests.sh: Likewise. * testsuite/mb-bad-delim.sh: Likewise. * testsuite/mb-charclass-non-utf8.sh: Likewise. * testsuite/regex-max-int.sh: Likewise. * testsuite/subst-options.sh: Likewise. * testsuite/test-mbrtowc.c: Likewise. * testsuite/utf8-ru.sh: Likewise.
This commit is contained in:
parent
5e9ee78fac
commit
b4d01a9c91
4
cfg.mk
4
cfg.mk
@ -408,3 +408,7 @@ build-ubsan:
|
||||
{ echo "./configure script not found" >&2; exit 1; }
|
||||
./configure CFLAGS="$(UBSAN_CFLAGS)" LDFLAGS="$(UBSAN_LDFLAGS)"
|
||||
make
|
||||
|
||||
exclude_file_name_regexp--sc_codespell = \
|
||||
^(THANKS\.in|testsuite/(8bit|mac-mf)\..+)$$
|
||||
codespell_ignore_words_list = ket,tre,debbugs,fo,2Rd,jaques,hel,te,bu,readin
|
||||
|
@ -424,7 +424,7 @@ debug_print_command (const struct vector *program, const struct sed_cmd *sc)
|
||||
debug_print_addr (sc->a2);
|
||||
|
||||
addr_bang = sc->addr_bang;
|
||||
/* Implmentation detail: GNU Sed implements beginning of block
|
||||
/* Implementation detail: GNU Sed implements beginning of block
|
||||
by negating the matched address and jumping if there's no match. */
|
||||
if (sc->cmd == '{')
|
||||
addr_bang = !addr_bang;
|
||||
|
@ -29,7 +29,7 @@ bool is_utf8;
|
||||
|
||||
Return zero in all other cases:
|
||||
CH is a valid single-byte character (e.g. 0x01-0x7F in UTF-8 locales);
|
||||
CH is an invalid byte in a multibyte sequence for the currentl locale,
|
||||
CH is an invalid byte in a multibyte sequence for the current locale,
|
||||
CH is the NUL byte.
|
||||
|
||||
Reset CUR_STAT in the case of an invalid byte.
|
||||
|
@ -59,7 +59,7 @@ bool separate_files = false;
|
||||
/* If set, follow symlinks when processing in place */
|
||||
bool follow_symlinks = false;
|
||||
|
||||
/* If set, opearate in 'sandbox' mode */
|
||||
/* If set, operate in 'sandbox' mode */
|
||||
bool sandbox = false;
|
||||
|
||||
/* if set, print debugging information */
|
||||
|
@ -27,7 +27,7 @@ printf "abc\nde\320f\nghi\njkl\nmno\npqr\nstu\nvwx\nyz\n" > badenc-inp \
|
||||
|| framework_failure_
|
||||
|
||||
|
||||
# The progarm: using 'z' to clear the pattern-space even
|
||||
# The program: using 'z' to clear the pattern-space even
|
||||
# if it contains invalid multibyte sequences.
|
||||
# Using 's/.*//' would not be able to clear the pattern-space.
|
||||
cat << \EOF > badenc.sed || framework_failure_
|
||||
|
@ -259,7 +259,7 @@ returns_ 1 sed 'y/a/a' </dev/null 2>err-unterm-y5 || fail=1
|
||||
compare_ exp-unterm-y err-unterm-y5 || fail=1
|
||||
|
||||
#
|
||||
# Y command with bad legth (Y_CMD_LEN)
|
||||
# Y command with bad length (Y_CMD_LEN)
|
||||
# TODO: check with multibyte strings.
|
||||
cat <<\EOF >exp-bad-y-len || framework_failure_
|
||||
sed: -e expression #1, char 7: 'y' command strings have different lengths
|
||||
|
@ -20,7 +20,7 @@ X-Mailer: ELM [version 2.4 PL23]
|
||||
shine of the sea:
|
||||
Breath and bloom, shade and shine, -- wonder,
|
||||
wealth, and -- how far above them --
|
||||
Truth, thats brighter than gem,
|
||||
Truth, that's brighter than gem,
|
||||
Trust, that's purer than pearl, --
|
||||
Brightest truth, purest trust in the universe --
|
||||
all were for me
|
||||
|
@ -103,7 +103,7 @@ Approved: alt.buddha.short.fat.guy@cygnus.com
|
||||
shine of the sea:
|
||||
Breath and bloom, shade and shine, -- wonder,
|
||||
wealth, and -- how far above them --
|
||||
Truth, thats brighter than gem,
|
||||
Truth, that's brighter than gem,
|
||||
Trust, that's purer than pearl, --
|
||||
Brightest truth, purest trust in the universe --
|
||||
all were for me
|
||||
|
@ -40,17 +40,17 @@ echo "printf a" | sed '1e' > out3 || fail=1
|
||||
compare_ exp3 out3 || fail=1
|
||||
|
||||
#
|
||||
# plain 'e' with a command that returns delimted '\n' output
|
||||
# plain 'e' with a command that returns delimited '\n' output
|
||||
# (implementation note: the delimiter is first chomp'd)
|
||||
printf "a\n" > exp4 || framework_failure_
|
||||
echo "echo a" | sed '1e' > out4 || fail=1
|
||||
compare_ exp4 out4 || fail=1
|
||||
|
||||
#
|
||||
# e with a command that returns delimted '\0' output
|
||||
# e with a command that returns delimited '\0' output
|
||||
#
|
||||
printf "b\0" > exp5 || framework_failure_
|
||||
# This input file contains the shell command to be excuted:
|
||||
# This input file contains the shell command to be executed:
|
||||
printf 'cat exp5' > in5 || framework_failure_
|
||||
sed -z '1e' <in5 > out5 || fail=1
|
||||
compare_ exp5 out5 || fail=1
|
||||
|
@ -30,7 +30,7 @@ require_en_utf8_locale_
|
||||
# Using the first octet alone (\316) causes various multibyte related functions
|
||||
# to return '-2' (incomplete multibyte sequence).
|
||||
# using the second octet alone (\246) causess same functions to return '-1'
|
||||
# (ivalid multibyte sequence).
|
||||
# (invalid multibyte sequence).
|
||||
|
||||
|
||||
# Reject a valid multibyte delimiter (instead of slash).
|
||||
@ -66,7 +66,7 @@ compare_ exp2 out2 || fail=1
|
||||
|
||||
|
||||
# An invalid multibyte sequence is treated as a valid single byte,
|
||||
# thus accepted as a delimter (instead of slash).
|
||||
# thus accepted as a delimiter (instead of slash).
|
||||
# This is an implmentation-specific behavior.
|
||||
printf 's\246a\246b\246' > prog3 || framework_failure_
|
||||
echo a > in3 || framework_failure_
|
||||
|
@ -71,11 +71,11 @@ require_valid_ja_shiftjis_locale_ "$LOCALE_JA_SJIS"
|
||||
|
||||
|
||||
#
|
||||
# Tests 1,2: Test y/// command with multibyte, non-utf8 seqeunce.
|
||||
# Implmenetation notes: str_append() has special code path for non-utf8 cases.
|
||||
# Tests 1,2: Test y/// command with multibyte, non-utf8 sequence.
|
||||
# Implementation notes: str_append() has special code path for non-utf8 cases.
|
||||
#
|
||||
|
||||
# Test 1: valid multibyte seqeunce
|
||||
# Test 1: valid multibyte sequence
|
||||
printf 'y/a/\203\133/' > p1 || framework_failure_
|
||||
echo Xa > in1 || framework_failure_
|
||||
printf 'X\203\133\n' > exp1 || framework_failure_
|
||||
@ -83,7 +83,7 @@ printf 'X\203\133\n' > exp1 || framework_failure_
|
||||
LC_ALL="$LOCALE_JA_SJIS" sed -f p1 <in1 >out1 || fail=1
|
||||
compare_ exp1 out1 || fail=1
|
||||
|
||||
# Test 2: invalid multibyte seqeunce, treated as two single-byte characters.
|
||||
# Test 2: invalid multibyte sequence, treated as two single-byte characters.
|
||||
printf 'y/aa/\203\060/' > p2 || framework_failure_
|
||||
LC_ALL="$LOCALE_JA_SJIS" sed -f p2 </dev/null 2>out2 || fail=1
|
||||
compare_ /dev/null out2 || fail=1
|
||||
|
@ -39,7 +39,7 @@ sed: regex input buffer length larger than INT_MAX
|
||||
EOF
|
||||
|
||||
|
||||
# Before sed-4.5, this was silently a no-op: would not perform the subsitution
|
||||
# Before sed-4.5, this was silently a no-op: would not perform the substitution
|
||||
# but would not indicate any error either (https://bugs.gnu.org/30520).
|
||||
# Exit code 4 is "panic".
|
||||
returns_ 4 sed 's/a/b/g' input >/dev/null 2>err1 || fail=1
|
||||
|
@ -91,7 +91,7 @@ compare_ subst-exp2 subst-out2 || fail=1
|
||||
#
|
||||
# Multiline modifier: s///m
|
||||
# ('N' will read and concatenate the second line
|
||||
# into the patten space, making it "foo\nbar".
|
||||
# into the pattern space, making it "foo\nbar".
|
||||
# s// will then operate on it as one string).
|
||||
printf "foo\nbar\n" > subst-in3 || fail=1
|
||||
printf "Xoo\nXar\n" > subst-exp3 || fail=1
|
||||
|
@ -15,7 +15,7 @@
|
||||
along with this program; If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Test the operating-system's native mbrtowc(3) function,
|
||||
by feeding it multibyte seqeunces one byte at a time,
|
||||
by feeding it multibyte sequences one byte at a time,
|
||||
and reporting the result.
|
||||
|
||||
The program prints the following values after each mbrtowc invocation,
|
||||
@ -83,7 +83,7 @@
|
||||
$ printf '\203\133' | LC_ALL=ja_JP.SJIS test-mbrtowc
|
||||
-2,1
|
||||
|
||||
The follwing is an INVALID multibyte sequence in SHIFT-JIS
|
||||
The following is an INVALID multibyte sequence in SHIFT-JIS
|
||||
(The byte ':' is not valid as a second octet).
|
||||
Buggy implementations will accept this as a valid multibyte sequence:
|
||||
|
||||
@ -102,7 +102,7 @@
|
||||
---------------------------------------------
|
||||
|
||||
In GNU Sed some tests purposely use invalid multibyte sequences
|
||||
to test sed's behaviour. A buggy implemetation of mbrtowc
|
||||
to test sed's behaviour. A buggy implementation of mbrtowc
|
||||
would result in false-alarm failures.
|
||||
|
||||
The following are expected results in correct implementations:
|
||||
|
@ -58,11 +58,11 @@ cp utf8-1-exp utf8-2-exp || framework_failure_
|
||||
|
||||
|
||||
# Test 3: Capitalize only the next character (\u)
|
||||
# Only the first "DE" should be capitilized.
|
||||
# Only the first "DE" should be capitalized.
|
||||
# s/д.*/\u&/g
|
||||
printf 's/\320\264.*/\\u&/g' > utf8-3.sed || framework_failure_
|
||||
|
||||
# Test 3: Expected output - First DE capitilized, second DE not.
|
||||
# Test 3: Expected output - First DE capitalized, second DE not.
|
||||
# Д а д
|
||||
printf '\320\224\320\260 \320\264\n' > utf8-3-exp || framework_failure_
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user