From 64a9a6d0c8bb224676724d395e52a570d6050d1d Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Wed, 5 Feb 2025 15:06:21 +0100 Subject: [PATCH 1/4] cli/connhelper: add fork of mvdan.cc/sh/v3/syntax v3.10.0 This adds a local fork of the mvdan.cc/sh/v3/syntax package to provide the Quote function without having to introduce additional (indirect) dependencies of the mvdan.cc/sh module. This commit does not compile as it references code not forked. The following files were included: - https://raw.githubusercontent.com/mvdan/sh/refs/tags/v3.10.0/syntax/quote.go - https://raw.githubusercontent.com/mvdan/sh/refs/tags/v3.10.0/syntax/parser.go - https://raw.githubusercontent.com/mvdan/sh/refs/tags/v3.10.0/LICENSE Signed-off-by: Sebastiaan van Stijn --- cli/connhelper/internal/syntax/LICENSE | 27 + cli/connhelper/internal/syntax/doc.go | 13 + cli/connhelper/internal/syntax/parser.go | 2464 ++++++++++++++++++++++ cli/connhelper/internal/syntax/quote.go | 185 ++ 4 files changed, 2689 insertions(+) create mode 100644 cli/connhelper/internal/syntax/LICENSE create mode 100644 cli/connhelper/internal/syntax/doc.go create mode 100644 cli/connhelper/internal/syntax/parser.go create mode 100644 cli/connhelper/internal/syntax/quote.go diff --git a/cli/connhelper/internal/syntax/LICENSE b/cli/connhelper/internal/syntax/LICENSE new file mode 100644 index 0000000000..2a5268e5f1 --- /dev/null +++ b/cli/connhelper/internal/syntax/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2016, Daniel Martí. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cli/connhelper/internal/syntax/doc.go b/cli/connhelper/internal/syntax/doc.go new file mode 100644 index 0000000000..32cf60c7c6 --- /dev/null +++ b/cli/connhelper/internal/syntax/doc.go @@ -0,0 +1,13 @@ +// Package syntax is a fork of [mvdan.cc/sh/v3@v3.10.0/syntax]. +// +// Copyright (c) 2016, Daniel Martí. All rights reserved. +// +// It is a reduced set of the package to only provide the [Quote] function, +// and contains the [LICENSE], [quote.go] and [parser.go] files at the given +// revision. +// +// [quote.go]: https://raw.githubusercontent.com/mvdan/sh/refs/tags/v3.10.0/syntax/quote.go +// [parser.go]: https://raw.githubusercontent.com/mvdan/sh/refs/tags/v3.10.0/syntax/parser.go +// [LICENSE]: https://raw.githubusercontent.com/mvdan/sh/refs/tags/v3.10.0/LICENSE +// [mvdan.cc/sh/v3@v3.10.0/syntax]: https://pkg.go.dev/mvdan.cc/sh/v3@v3.10.0/syntax +package syntax diff --git a/cli/connhelper/internal/syntax/parser.go b/cli/connhelper/internal/syntax/parser.go new file mode 100644 index 0000000000..0bc10e8694 --- /dev/null +++ b/cli/connhelper/internal/syntax/parser.go @@ -0,0 +1,2464 @@ +// Copyright (c) 2016, Daniel Martí +// See LICENSE for licensing information + +package syntax + +import ( + "fmt" + "io" + "strconv" + "strings" + "unicode/utf8" +) + +// ParserOption is a function which can be passed to NewParser +// to alter its behavior. To apply option to existing Parser +// call it directly, for example KeepComments(true)(parser). +type ParserOption func(*Parser) + +// KeepComments makes the parser parse comments and attach them to +// nodes, as opposed to discarding them. +func KeepComments(enabled bool) ParserOption { + return func(p *Parser) { p.keepComments = enabled } +} + +// LangVariant describes a shell language variant to use when tokenizing and +// parsing shell code. The zero value is [LangBash]. +type LangVariant int + +const ( + // LangBash corresponds to the GNU Bash language, as described in its + // manual at https://www.gnu.org/software/bash/manual/bash.html. + // + // We currently follow Bash version 5.2. + // + // Its string representation is "bash". + LangBash LangVariant = iota + + // LangPOSIX corresponds to the POSIX Shell language, as described at + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html. + // + // Its string representation is "posix" or "sh". + LangPOSIX + + // LangMirBSDKorn corresponds to the MirBSD Korn Shell, also known as + // mksh, as described at http://www.mirbsd.org/htman/i386/man1/mksh.htm. + // Note that it shares some features with Bash, due to the the shared + // ancestry that is ksh. + // + // We currently follow mksh version 59. + // + // Its string representation is "mksh". + LangMirBSDKorn + + // LangBats corresponds to the Bash Automated Testing System language, + // as described at https://github.com/bats-core/bats-core. Note that + // it's just a small extension of the Bash language. + // + // Its string representation is "bats". + LangBats + + // LangAuto corresponds to automatic language detection, + // commonly used by end-user applications like shfmt, + // which can guess a file's language variant given its filename or shebang. + // + // At this time, [Variant] does not support LangAuto. + LangAuto +) + +// Variant changes the shell language variant that the parser will +// accept. +// +// The passed language variant must be one of the constant values defined in +// this package. +func Variant(l LangVariant) ParserOption { + switch l { + case LangBash, LangPOSIX, LangMirBSDKorn, LangBats: + case LangAuto: + panic("LangAuto is not supported by the parser at this time") + default: + panic(fmt.Sprintf("unknown shell language variant: %d", l)) + } + return func(p *Parser) { p.lang = l } +} + +func (l LangVariant) String() string { + switch l { + case LangBash: + return "bash" + case LangPOSIX: + return "posix" + case LangMirBSDKorn: + return "mksh" + case LangBats: + return "bats" + case LangAuto: + return "auto" + } + return "unknown shell language variant" +} + +func (l *LangVariant) Set(s string) error { + switch s { + case "bash": + *l = LangBash + case "posix", "sh": + *l = LangPOSIX + case "mksh": + *l = LangMirBSDKorn + case "bats": + *l = LangBats + case "auto": + *l = LangAuto + default: + return fmt.Errorf("unknown shell language variant: %q", s) + } + return nil +} + +func (l LangVariant) isBash() bool { + return l == LangBash || l == LangBats +} + +// StopAt configures the lexer to stop at an arbitrary word, treating it +// as if it were the end of the input. It can contain any characters +// except whitespace, and cannot be over four bytes in size. +// +// This can be useful to embed shell code within another language, as +// one can use a special word to mark the delimiters between the two. +// +// As a word, it will only apply when following whitespace or a +// separating token. For example, StopAt("$$") will act on the inputs +// "foo $$" and "foo;$$", but not on "foo '$$'". +// +// The match is done by prefix, so the example above will also act on +// "foo $$bar". +func StopAt(word string) ParserOption { + if len(word) > 4 { + panic("stop word can't be over four bytes in size") + } + if strings.ContainsAny(word, " \t\n\r") { + panic("stop word can't contain whitespace characters") + } + return func(p *Parser) { p.stopAt = []byte(word) } +} + +// NewParser allocates a new [Parser] and applies any number of options. +func NewParser(options ...ParserOption) *Parser { + p := &Parser{} + for _, opt := range options { + opt(p) + } + return p +} + +// Parse reads and parses a shell program with an optional name. It +// returns the parsed program if no issues were encountered. Otherwise, +// an error is returned. Reads from r are buffered. +// +// Parse can be called more than once, but not concurrently. That is, a +// Parser can be reused once it is done working. +func (p *Parser) Parse(r io.Reader, name string) (*File, error) { + p.reset() + p.f = &File{Name: name} + p.src = r + p.rune() + p.next() + p.f.Stmts, p.f.Last = p.stmtList() + if p.err == nil { + // EOF immediately after heredoc word so no newline to + // trigger it + p.doHeredocs() + } + return p.f, p.err +} + +// Stmts reads and parses statements one at a time, calling a function +// each time one is parsed. If the function returns false, parsing is +// stopped and the function is not called again. +func (p *Parser) Stmts(r io.Reader, fn func(*Stmt) bool) error { + p.reset() + p.f = &File{} + p.src = r + p.rune() + p.next() + p.stmts(fn) + if p.err == nil { + // EOF immediately after heredoc word so no newline to + // trigger it + p.doHeredocs() + } + return p.err +} + +type wrappedReader struct { + *Parser + io.Reader + + lastLine int64 + accumulated []*Stmt + fn func([]*Stmt) bool +} + +func (w *wrappedReader) Read(p []byte) (n int, err error) { + // If we lexed a newline for the first time, we just finished a line, so + // we may need to give a callback for the edge cases below not covered + // by Parser.Stmts. + if (w.r == '\n' || w.r == escNewl) && w.line > w.lastLine { + if w.Incomplete() { + // Incomplete statement; call back to print "> ". + if !w.fn(w.accumulated) { + return 0, io.EOF + } + } else if len(w.accumulated) == 0 { + // Nothing was parsed; call back to print another "$ ". + if !w.fn(nil) { + return 0, io.EOF + } + } + w.lastLine = w.line + } + return w.Reader.Read(p) +} + +// Interactive implements what is necessary to parse statements in an +// interactive shell. The parser will call the given function under two +// circumstances outlined below. +// +// If a line containing any number of statements is parsed, the function will be +// called with said statements. +// +// If a line ending in an incomplete statement is parsed, the function will be +// called with any fully parsed statements, and [Parser.Incomplete] will return true. +// +// One can imagine a simple interactive shell implementation as follows: +// +// fmt.Fprintf(os.Stdout, "$ ") +// parser.Interactive(os.Stdin, func(stmts []*syntax.Stmt) bool { +// if parser.Incomplete() { +// fmt.Fprintf(os.Stdout, "> ") +// return true +// } +// run(stmts) +// fmt.Fprintf(os.Stdout, "$ ") +// return true +// } +// +// If the callback function returns false, parsing is stopped and the function +// is not called again. +func (p *Parser) Interactive(r io.Reader, fn func([]*Stmt) bool) error { + w := wrappedReader{Parser: p, Reader: r, fn: fn} + return p.Stmts(&w, func(stmt *Stmt) bool { + w.accumulated = append(w.accumulated, stmt) + // We finished parsing a statement and we're at a newline token, + // so we finished fully parsing a number of statements. Call + // back to run the statements and print "$ ". + if p.tok == _Newl { + if !fn(w.accumulated) { + return false + } + w.accumulated = w.accumulated[:0] + // The callback above would already print "$ ", so we + // don't want the subsequent wrappedReader.Read to cause + // another "$ " print thinking that nothing was parsed. + w.lastLine = w.line + 1 + } + return true + }) +} + +// Words reads and parses words one at a time, calling a function each time one +// is parsed. If the function returns false, parsing is stopped and the function +// is not called again. +// +// Newlines are skipped, meaning that multi-line input will work fine. If the +// parser encounters a token that isn't a word, such as a semicolon, an error +// will be returned. +// +// Note that the lexer doesn't currently tokenize spaces, so it may need to read +// a non-space byte such as a newline or a letter before finishing the parsing +// of a word. This will be fixed in the future. +func (p *Parser) Words(r io.Reader, fn func(*Word) bool) error { + p.reset() + p.f = &File{} + p.src = r + p.rune() + p.next() + for { + p.got(_Newl) + w := p.getWord() + if w == nil { + if p.tok != _EOF { + p.curErr("%s is not a valid word", p.tok) + } + return p.err + } + if !fn(w) { + return nil + } + } +} + +// Document parses a single here-document word. That is, it parses the input as +// if they were lines following a < 0 || p.litBs != nil +} + +const bufSize = 1 << 10 + +func (p *Parser) reset() { + p.tok, p.val = illegalTok, "" + p.eqlOffs = 0 + p.bs, p.bsp = nil, 0 + p.offs, p.line, p.col = 0, 1, 1 + p.r, p.w = 0, 0 + p.err, p.readErr = nil, nil + p.quote, p.forbidNested = noState, false + p.openStmts = 0 + p.heredocs, p.buriedHdocs = p.heredocs[:0], 0 + p.hdocStops = nil + p.parsingDoc = false + p.openBquotes = 0 + p.accComs = nil + p.accComs, p.curComs = nil, &p.accComs + p.litBatch = nil + p.wordBatch = nil + p.litBs = nil +} + +func (p *Parser) nextPos() Pos { + // Basic protection against offset overflow; + // note that an offset of 0 is valid, so we leave the maximum. + offset := min(p.offs+int64(p.bsp)-int64(p.w), offsetMax) + var line, col uint + if p.line <= lineMax { + line = uint(p.line) + } + if p.col <= colMax { + col = uint(p.col) + } + return NewPos(uint(offset), line, col) +} + +func (p *Parser) lit(pos Pos, val string) *Lit { + if len(p.litBatch) == 0 { + p.litBatch = make([]Lit, 32) + } + l := &p.litBatch[0] + p.litBatch = p.litBatch[1:] + l.ValuePos = pos + l.ValueEnd = p.nextPos() + l.Value = val + return l +} + +type wordAlloc struct { + word Word + parts [1]WordPart +} + +func (p *Parser) wordAnyNumber() *Word { + if len(p.wordBatch) == 0 { + p.wordBatch = make([]wordAlloc, 32) + } + alloc := &p.wordBatch[0] + p.wordBatch = p.wordBatch[1:] + w := &alloc.word + w.Parts = p.wordParts(alloc.parts[:0]) + return w +} + +func (p *Parser) wordOne(part WordPart) *Word { + if len(p.wordBatch) == 0 { + p.wordBatch = make([]wordAlloc, 32) + } + alloc := &p.wordBatch[0] + p.wordBatch = p.wordBatch[1:] + w := &alloc.word + w.Parts = alloc.parts[:1] + w.Parts[0] = part + return w +} + +func (p *Parser) call(w *Word) *CallExpr { + var alloc struct { + ce CallExpr + ws [4]*Word + } + ce := &alloc.ce + ce.Args = alloc.ws[:1] + ce.Args[0] = w + return ce +} + +//go:generate stringer -type=quoteState + +type quoteState uint32 + +const ( + noState quoteState = 1 << iota + subCmd + subCmdBckquo + dblQuotes + hdocWord + hdocBody + hdocBodyTabs + arithmExpr + arithmExprLet + arithmExprCmd + arithmExprBrack + testExpr + testExprRegexp + switchCase + paramExpName + paramExpSlice + paramExpRepl + paramExpExp + arrayElems + + allKeepSpaces = paramExpRepl | dblQuotes | hdocBody | + hdocBodyTabs | paramExpExp + allRegTokens = noState | subCmd | subCmdBckquo | hdocWord | + switchCase | arrayElems | testExpr + allArithmExpr = arithmExpr | arithmExprLet | arithmExprCmd | + arithmExprBrack | paramExpSlice + allParamReg = paramExpName | paramExpSlice + allParamExp = allParamReg | paramExpRepl | paramExpExp | arithmExprBrack +) + +type saveState struct { + quote quoteState + buriedHdocs int +} + +func (p *Parser) preNested(quote quoteState) (s saveState) { + s.quote, s.buriedHdocs = p.quote, p.buriedHdocs + p.buriedHdocs, p.quote = len(p.heredocs), quote + return +} + +func (p *Parser) postNested(s saveState) { + p.quote, p.buriedHdocs = s.quote, s.buriedHdocs +} + +func (p *Parser) unquotedWordBytes(w *Word) ([]byte, bool) { + buf := make([]byte, 0, 4) + didUnquote := false + for _, wp := range w.Parts { + buf, didUnquote = p.unquotedWordPart(buf, wp, false) + } + return buf, didUnquote +} + +func (p *Parser) unquotedWordPart(buf []byte, wp WordPart, quotes bool) (_ []byte, quoted bool) { + switch wp := wp.(type) { + case *Lit: + for i := 0; i < len(wp.Value); i++ { + if b := wp.Value[i]; b == '\\' && !quotes { + if i++; i < len(wp.Value) { + buf = append(buf, wp.Value[i]) + } + quoted = true + } else { + buf = append(buf, b) + } + } + case *SglQuoted: + buf = append(buf, []byte(wp.Value)...) + quoted = true + case *DblQuoted: + for _, wp2 := range wp.Parts { + buf, _ = p.unquotedWordPart(buf, wp2, true) + } + quoted = true + } + return buf, quoted +} + +func (p *Parser) doHeredocs() { + hdocs := p.heredocs[p.buriedHdocs:] + if len(hdocs) == 0 { + // Nothing do do; don't even issue a read. + return + } + p.rune() // consume '\n', since we know p.tok == _Newl + old := p.quote + p.heredocs = p.heredocs[:p.buriedHdocs] + for i, r := range hdocs { + if p.err != nil { + break + } + p.quote = hdocBody + if r.Op == DashHdoc { + p.quote = hdocBodyTabs + } + stop, quoted := p.unquotedWordBytes(r.Word) + p.hdocStops = append(p.hdocStops, stop) + if i > 0 && p.r == '\n' { + p.rune() + } + lastLine := p.line + if quoted { + r.Hdoc = p.quotedHdocWord() + } else { + p.next() + r.Hdoc = p.getWord() + } + if r.Hdoc != nil { + lastLine = int64(r.Hdoc.End().Line()) + } + if lastLine < p.line { + // TODO: It seems like this triggers more often than it + // should. Look into it. + l := p.lit(p.nextPos(), "") + if r.Hdoc == nil { + r.Hdoc = p.wordOne(l) + } else { + r.Hdoc.Parts = append(r.Hdoc.Parts, l) + } + } + if stop := p.hdocStops[len(p.hdocStops)-1]; stop != nil { + p.posErr(r.Pos(), "unclosed here-document '%s'", stop) + } + p.hdocStops = p.hdocStops[:len(p.hdocStops)-1] + } + p.quote = old +} + +func (p *Parser) got(tok token) bool { + if p.tok == tok { + p.next() + return true + } + return false +} + +func (p *Parser) gotRsrv(val string) (Pos, bool) { + pos := p.pos + if p.tok == _LitWord && p.val == val { + p.next() + return pos, true + } + return pos, false +} + +func readableStr(s string) string { + // don't quote tokens like & or } + if s != "" && s[0] >= 'a' && s[0] <= 'z' { + return strconv.Quote(s) + } + return s +} + +func (p *Parser) followErr(pos Pos, left, right string) { + leftStr := readableStr(left) + p.posErr(pos, "%s must be followed by %s", leftStr, right) +} + +func (p *Parser) followErrExp(pos Pos, left string) { + p.followErr(pos, left, "an expression") +} + +func (p *Parser) follow(lpos Pos, left string, tok token) { + if !p.got(tok) { + p.followErr(lpos, left, tok.String()) + } +} + +func (p *Parser) followRsrv(lpos Pos, left, val string) Pos { + pos, ok := p.gotRsrv(val) + if !ok { + p.followErr(lpos, left, fmt.Sprintf("%q", val)) + } + return pos +} + +func (p *Parser) followStmts(left string, lpos Pos, stops ...string) ([]*Stmt, []Comment) { + if p.got(semicolon) { + return nil, nil + } + newLine := p.got(_Newl) + stmts, last := p.stmtList(stops...) + if len(stmts) < 1 && !newLine { + p.followErr(lpos, left, "a statement list") + } + return stmts, last +} + +func (p *Parser) followWordTok(tok token, pos Pos) *Word { + w := p.getWord() + if w == nil { + p.followErr(pos, tok.String(), "a word") + } + return w +} + +func (p *Parser) stmtEnd(n Node, start, end string) Pos { + pos, ok := p.gotRsrv(end) + if !ok { + p.posErr(n.Pos(), "%s statement must end with %q", start, end) + } + return pos +} + +func (p *Parser) quoteErr(lpos Pos, quote token) { + p.posErr(lpos, "reached %s without closing quote %s", + p.tok.String(), quote) +} + +func (p *Parser) matchingErr(lpos Pos, left, right any) { + p.posErr(lpos, "reached %s without matching %s with %s", + p.tok.String(), left, right) +} + +func (p *Parser) matched(lpos Pos, left, right token) Pos { + pos := p.pos + if !p.got(right) { + p.matchingErr(lpos, left, right) + } + return pos +} + +func (p *Parser) errPass(err error) { + if p.err == nil { + p.err = err + p.bsp = uint(len(p.bs)) + 1 + p.r = utf8.RuneSelf + p.w = 1 + p.tok = _EOF + } +} + +// IsIncomplete reports whether a Parser error could have been avoided with +// extra input bytes. For example, if an [io.EOF] was encountered while there was +// an unclosed quote or parenthesis. +func IsIncomplete(err error) bool { + perr, ok := err.(ParseError) + return ok && perr.Incomplete +} + +// IsKeyword returns true if the given word is part of the language keywords. +func IsKeyword(word string) bool { + // This list has been copied from the bash 5.1 source code, file y.tab.c +4460 + switch word { + case + "!", + "[[", // only if COND_COMMAND is defined + "]]", // only if COND_COMMAND is defined + "case", + "coproc", // only if COPROCESS_SUPPORT is defined + "do", + "done", + "else", + "esac", + "fi", + "for", + "function", + "if", + "in", + "select", // only if SELECT_COMMAND is defined + "then", + "time", // only if COMMAND_TIMING is defined + "until", + "while", + "{", + "}": + return true + } + return false +} + +// ParseError represents an error found when parsing a source file, from which +// the parser cannot recover. +type ParseError struct { + Filename string + Pos Pos + Text string + + Incomplete bool +} + +func (e ParseError) Error() string { + if e.Filename == "" { + return fmt.Sprintf("%s: %s", e.Pos.String(), e.Text) + } + return fmt.Sprintf("%s:%s: %s", e.Filename, e.Pos.String(), e.Text) +} + +// LangError is returned when the parser encounters code that is only valid in +// other shell language variants. The error includes what feature is not present +// in the current language variant, and what languages support it. +type LangError struct { + Filename string + Pos Pos + Feature string + Langs []LangVariant +} + +func (e LangError) Error() string { + var sb strings.Builder + if e.Filename != "" { + sb.WriteString(e.Filename + ":") + } + sb.WriteString(e.Pos.String() + ": ") + sb.WriteString(e.Feature) + if strings.HasSuffix(e.Feature, "s") { + sb.WriteString(" are a ") + } else { + sb.WriteString(" is a ") + } + for i, lang := range e.Langs { + if i > 0 { + sb.WriteString("/") + } + sb.WriteString(lang.String()) + } + sb.WriteString(" feature") + return sb.String() +} + +func (p *Parser) posErr(pos Pos, format string, a ...any) { + p.errPass(ParseError{ + Filename: p.f.Name, + Pos: pos, + Text: fmt.Sprintf(format, a...), + Incomplete: p.tok == _EOF && p.Incomplete(), + }) +} + +func (p *Parser) curErr(format string, a ...any) { + p.posErr(p.pos, format, a...) +} + +func (p *Parser) langErr(pos Pos, feature string, langs ...LangVariant) { + p.errPass(LangError{ + Filename: p.f.Name, + Pos: pos, + Feature: feature, + Langs: langs, + }) +} + +func (p *Parser) stmts(fn func(*Stmt) bool, stops ...string) { + gotEnd := true +loop: + for p.tok != _EOF { + newLine := p.got(_Newl) + switch p.tok { + case _LitWord: + for _, stop := range stops { + if p.val == stop { + break loop + } + } + case rightParen: + if p.quote == subCmd { + break loop + } + case bckQuote: + if p.backquoteEnd() { + break loop + } + case dblSemicolon, semiAnd, dblSemiAnd, semiOr: + if p.quote == switchCase { + break loop + } + p.curErr("%s can only be used in a case clause", p.tok) + } + if !newLine && !gotEnd { + p.curErr("statements must be separated by &, ; or a newline") + } + if p.tok == _EOF { + break + } + p.openStmts++ + s := p.getStmt(true, false, false) + p.openStmts-- + if s == nil { + p.invalidStmtStart() + break + } + gotEnd = s.Semicolon.IsValid() + if !fn(s) { + break + } + } +} + +func (p *Parser) stmtList(stops ...string) ([]*Stmt, []Comment) { + var stmts []*Stmt + var last []Comment + fn := func(s *Stmt) bool { + stmts = append(stmts, s) + return true + } + p.stmts(fn, stops...) + split := len(p.accComs) + if p.tok == _LitWord && (p.val == "elif" || p.val == "else" || p.val == "fi") { + // Split the comments, so that any aligned with an opening token + // get attached to it. For example: + // + // if foo; then + // # inside the body + // # document the else + // else + // fi + // TODO(mvdan): look into deduplicating this with similar logic + // in caseItems. + for i := len(p.accComs) - 1; i >= 0; i-- { + c := p.accComs[i] + if c.Pos().Col() != p.pos.Col() { + break + } + split = i + } + } + if split > 0 { // keep last nil if empty + last = p.accComs[:split] + } + p.accComs = p.accComs[split:] + return stmts, last +} + +func (p *Parser) invalidStmtStart() { + switch p.tok { + case semicolon, and, or, andAnd, orOr: + p.curErr("%s can only immediately follow a statement", p.tok) + case rightParen: + p.curErr("%s can only be used to close a subshell", p.tok) + default: + p.curErr("%s is not a valid start for a statement", p.tok) + } +} + +func (p *Parser) getWord() *Word { + if w := p.wordAnyNumber(); len(w.Parts) > 0 && p.err == nil { + return w + } + return nil +} + +func (p *Parser) getLit() *Lit { + switch p.tok { + case _Lit, _LitWord, _LitRedir: + l := p.lit(p.pos, p.val) + p.next() + return l + } + return nil +} + +func (p *Parser) wordParts(wps []WordPart) []WordPart { + for { + n := p.wordPart() + if n == nil { + if len(wps) == 0 { + return nil // normalize empty lists into nil + } + return wps + } + wps = append(wps, n) + if p.spaced { + return wps + } + } +} + +func (p *Parser) ensureNoNested() { + if p.forbidNested { + p.curErr("expansions not allowed in heredoc words") + } +} + +func (p *Parser) wordPart() WordPart { + switch p.tok { + case _Lit, _LitWord, _LitRedir: + l := p.lit(p.pos, p.val) + p.next() + return l + case dollBrace: + p.ensureNoNested() + switch p.r { + case '|': + if p.lang != LangMirBSDKorn { + p.curErr(`"${|stmts;}" is a mksh feature`) + } + fallthrough + case ' ', '\t', '\n': + if p.lang != LangMirBSDKorn { + p.curErr(`"${ stmts;}" is a mksh feature`) + } + cs := &CmdSubst{ + Left: p.pos, + TempFile: p.r != '|', + ReplyVar: p.r == '|', + } + old := p.preNested(subCmd) + p.rune() // don't tokenize '|' + p.next() + cs.Stmts, cs.Last = p.stmtList("}") + p.postNested(old) + pos, ok := p.gotRsrv("}") + if !ok { + p.matchingErr(cs.Left, "${", "}") + } + cs.Right = pos + return cs + default: + return p.paramExp() + } + case dollDblParen, dollBrack: + p.ensureNoNested() + left := p.tok + ar := &ArithmExp{Left: p.pos, Bracket: left == dollBrack} + var old saveState + if ar.Bracket { + old = p.preNested(arithmExprBrack) + } else { + old = p.preNested(arithmExpr) + } + p.next() + if p.got(hash) { + if p.lang != LangMirBSDKorn { + p.langErr(ar.Pos(), "unsigned expressions", LangMirBSDKorn) + } + ar.Unsigned = true + } + ar.X = p.followArithm(left, ar.Left) + if ar.Bracket { + if p.tok != rightBrack { + p.arithmMatchingErr(ar.Left, dollBrack, rightBrack) + } + p.postNested(old) + ar.Right = p.pos + p.next() + } else { + ar.Right = p.arithmEnd(dollDblParen, ar.Left, old) + } + return ar + case dollParen: + p.ensureNoNested() + cs := &CmdSubst{Left: p.pos} + old := p.preNested(subCmd) + p.next() + cs.Stmts, cs.Last = p.stmtList() + p.postNested(old) + cs.Right = p.matched(cs.Left, leftParen, rightParen) + return cs + case dollar: + r := p.r + switch { + case singleRuneParam(r): + p.tok, p.val = _LitWord, string(r) + p.rune() + case 'a' <= r && r <= 'z', 'A' <= r && r <= 'Z', + '0' <= r && r <= '9', r == '_', r == '\\': + p.advanceNameCont(r) + default: + l := p.lit(p.pos, "$") + p.next() + return l + } + p.ensureNoNested() + pe := &ParamExp{Dollar: p.pos, Short: true} + p.pos = posAddCol(p.pos, 1) + pe.Param = p.getLit() + if pe.Param != nil && pe.Param.Value == "" { + l := p.lit(pe.Dollar, "$") + // e.g. "$\\\"" within double quotes, so we must + // keep the rest of the literal characters. + l.ValueEnd = posAddCol(l.ValuePos, 1) + return l + } + return pe + case cmdIn, cmdOut: + p.ensureNoNested() + ps := &ProcSubst{Op: ProcOperator(p.tok), OpPos: p.pos} + old := p.preNested(subCmd) + p.next() + ps.Stmts, ps.Last = p.stmtList() + p.postNested(old) + ps.Rparen = p.matched(ps.OpPos, token(ps.Op), rightParen) + return ps + case sglQuote, dollSglQuote: + sq := &SglQuoted{Left: p.pos, Dollar: p.tok == dollSglQuote} + r := p.r + for p.newLit(r); ; r = p.rune() { + switch r { + case '\\': + if sq.Dollar { + p.rune() + } + case '\'': + sq.Right = p.nextPos() + sq.Value = p.endLit() + + p.rune() + p.next() + return sq + case escNewl: + p.litBs = append(p.litBs, '\\', '\n') + case utf8.RuneSelf: + p.tok = _EOF + p.quoteErr(sq.Pos(), sglQuote) + return nil + } + } + case dblQuote, dollDblQuote: + if p.quote == dblQuotes { + // p.tok == dblQuote, as "foo$" puts $ in the lit + return nil + } + return p.dblQuoted() + case bckQuote: + if p.backquoteEnd() { + return nil + } + p.ensureNoNested() + cs := &CmdSubst{Left: p.pos, Backquotes: true} + old := p.preNested(subCmdBckquo) + p.openBquotes++ + + // The lexer didn't call p.rune for us, so that it could have + // the right p.openBquotes to properly handle backslashes. + p.rune() + + p.next() + cs.Stmts, cs.Last = p.stmtList() + if p.tok == bckQuote && p.lastBquoteEsc < p.openBquotes-1 { + // e.g. found ` before the nested backquote \` was closed. + p.tok = _EOF + p.quoteErr(cs.Pos(), bckQuote) + } + p.postNested(old) + p.openBquotes-- + cs.Right = p.pos + + // Like above, the lexer didn't call p.rune for us. + p.rune() + if !p.got(bckQuote) { + p.quoteErr(cs.Pos(), bckQuote) + } + return cs + case globQuest, globStar, globPlus, globAt, globExcl: + if p.lang == LangPOSIX { + p.langErr(p.pos, "extended globs", LangBash, LangMirBSDKorn) + } + eg := &ExtGlob{Op: GlobOperator(p.tok), OpPos: p.pos} + lparens := 1 + r := p.r + globLoop: + for p.newLit(r); ; r = p.rune() { + switch r { + case utf8.RuneSelf: + break globLoop + case '(': + lparens++ + case ')': + if lparens--; lparens == 0 { + break globLoop + } + } + } + eg.Pattern = p.lit(posAddCol(eg.OpPos, 2), p.endLit()) + p.rune() + p.next() + if lparens != 0 { + p.matchingErr(eg.OpPos, eg.Op, rightParen) + } + return eg + default: + return nil + } +} + +func (p *Parser) dblQuoted() *DblQuoted { + alloc := &struct { + quoted DblQuoted + parts [1]WordPart + }{ + quoted: DblQuoted{Left: p.pos, Dollar: p.tok == dollDblQuote}, + } + q := &alloc.quoted + old := p.quote + p.quote = dblQuotes + p.next() + q.Parts = p.wordParts(alloc.parts[:0]) + p.quote = old + q.Right = p.pos + if !p.got(dblQuote) { + p.quoteErr(q.Pos(), dblQuote) + } + return q +} + +func singleRuneParam(r rune) bool { + switch r { + case '@', '*', '#', '$', '?', '!', '-', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return true + } + return false +} + +func (p *Parser) paramExp() *ParamExp { + pe := &ParamExp{Dollar: p.pos} + old := p.quote + p.quote = paramExpName + if p.r == '#' { + p.tok = hash + p.pos = p.nextPos() + p.rune() + } else { + p.next() + } + switch p.tok { + case hash: + if paramNameOp(p.r) { + pe.Length = true + p.next() + } + case perc: + if p.lang != LangMirBSDKorn { + p.posErr(pe.Pos(), `"${%%foo}" is a mksh feature`) + } + if paramNameOp(p.r) { + pe.Width = true + p.next() + } + case exclMark: + if paramNameOp(p.r) { + pe.Excl = true + p.next() + } + } + op := p.tok + switch p.tok { + case _Lit, _LitWord: + if !numberLiteral(p.val) && !ValidName(p.val) { + p.curErr("invalid parameter name") + } + pe.Param = p.lit(p.pos, p.val) + p.next() + case quest, minus: + if pe.Length && p.r != '}' { + // actually ${#-default}, not ${#-}; fix the ambiguity + pe.Length = false + pe.Param = p.lit(posAddCol(p.pos, -1), "#") + pe.Param.ValueEnd = p.pos + break + } + fallthrough + case at, star, hash, exclMark, dollar: + pe.Param = p.lit(p.pos, p.tok.String()) + p.next() + default: + p.curErr("parameter expansion requires a literal") + } + switch p.tok { + case _Lit, _LitWord: + p.curErr("%s cannot be followed by a word", op) + case rightBrace: + if pe.Excl && p.lang == LangPOSIX { + p.posErr(pe.Pos(), `"${!foo}" is a bash/mksh feature`) + } + pe.Rbrace = p.pos + p.quote = old + p.next() + return pe + case leftBrack: + if p.lang == LangPOSIX { + p.langErr(p.pos, "arrays", LangBash, LangMirBSDKorn) + } + if !ValidName(pe.Param.Value) { + p.curErr("cannot index a special parameter name") + } + pe.Index = p.eitherIndex() + } + if p.tok == rightBrace { + pe.Rbrace = p.pos + p.quote = old + p.next() + return pe + } + if p.tok != _EOF && (pe.Length || pe.Width) { + p.curErr("cannot combine multiple parameter expansion operators") + } + switch p.tok { + case slash, dblSlash: + // pattern search and replace + if p.lang == LangPOSIX { + p.langErr(p.pos, "search and replace", LangBash, LangMirBSDKorn) + } + pe.Repl = &Replace{All: p.tok == dblSlash} + p.quote = paramExpRepl + p.next() + pe.Repl.Orig = p.getWord() + p.quote = paramExpExp + if p.got(slash) { + pe.Repl.With = p.getWord() + } + case colon: + // slicing + if p.lang == LangPOSIX { + p.langErr(p.pos, "slicing", LangBash, LangMirBSDKorn) + } + pe.Slice = &Slice{} + colonPos := p.pos + p.quote = paramExpSlice + if p.next(); p.tok != colon { + pe.Slice.Offset = p.followArithm(colon, colonPos) + } + colonPos = p.pos + if p.got(colon) { + pe.Slice.Length = p.followArithm(colon, colonPos) + } + // Need to use a different matched style so arithm errors + // get reported correctly + p.quote = old + pe.Rbrace = p.pos + p.matchedArithm(pe.Dollar, dollBrace, rightBrace) + return pe + case caret, dblCaret, comma, dblComma: + // upper/lower case + if !p.lang.isBash() { + p.langErr(p.pos, "this expansion operator", LangBash) + } + pe.Exp = p.paramExpExp() + case at, star: + switch { + case p.tok == at && p.lang == LangPOSIX: + p.langErr(p.pos, "this expansion operator", LangBash, LangMirBSDKorn) + case p.tok == star && !pe.Excl: + p.curErr("not a valid parameter expansion operator: %v", p.tok) + case pe.Excl && p.r == '}': + if !p.lang.isBash() { + p.posErr(pe.Pos(), `"${!foo%s}" is a bash feature`, p.tok) + } + pe.Names = ParNamesOperator(p.tok) + p.next() + default: + pe.Exp = p.paramExpExp() + } + case plus, colPlus, minus, colMinus, quest, colQuest, assgn, colAssgn, + perc, dblPerc, hash, dblHash: + pe.Exp = p.paramExpExp() + case _EOF: + default: + p.curErr("not a valid parameter expansion operator: %v", p.tok) + } + p.quote = old + pe.Rbrace = p.matched(pe.Dollar, dollBrace, rightBrace) + return pe +} + +func (p *Parser) paramExpExp() *Expansion { + op := ParExpOperator(p.tok) + p.quote = paramExpExp + p.next() + if op == OtherParamOps { + switch p.tok { + case _Lit, _LitWord: + default: + p.curErr("@ expansion operator requires a literal") + } + switch p.val { + case "a", "k", "u", "A", "E", "K", "L", "P", "U": + if !p.lang.isBash() { + p.langErr(p.pos, "this expansion operator", LangBash) + } + case "#": + if p.lang != LangMirBSDKorn { + p.langErr(p.pos, "this expansion operator", LangMirBSDKorn) + } + case "Q": + default: + p.curErr("invalid @ expansion operator %q", p.val) + } + } + return &Expansion{Op: op, Word: p.getWord()} +} + +func (p *Parser) eitherIndex() ArithmExpr { + old := p.quote + lpos := p.pos + p.quote = arithmExprBrack + p.next() + if p.tok == star || p.tok == at { + p.tok, p.val = _LitWord, p.tok.String() + } + expr := p.followArithm(leftBrack, lpos) + p.quote = old + p.matchedArithm(lpos, leftBrack, rightBrack) + return expr +} + +func (p *Parser) stopToken() bool { + switch p.tok { + case _EOF, _Newl, semicolon, and, or, andAnd, orOr, orAnd, dblSemicolon, + semiAnd, dblSemiAnd, semiOr, rightParen: + return true + case bckQuote: + return p.backquoteEnd() + } + return false +} + +func (p *Parser) backquoteEnd() bool { + return p.lastBquoteEsc < p.openBquotes +} + +// ValidName returns whether val is a valid name as per the POSIX spec. +func ValidName(val string) bool { + if val == "" { + return false + } + for i, r := range val { + switch { + case 'a' <= r && r <= 'z': + case 'A' <= r && r <= 'Z': + case r == '_': + case i > 0 && '0' <= r && r <= '9': + default: + return false + } + } + return true +} + +func numberLiteral(val string) bool { + for _, r := range val { + if '0' > r || r > '9' { + return false + } + } + return true +} + +func (p *Parser) hasValidIdent() bool { + if p.tok != _Lit && p.tok != _LitWord { + return false + } + if end := p.eqlOffs; end > 0 { + if p.val[end-1] == '+' && p.lang != LangPOSIX { + end-- // a+=x + } + if ValidName(p.val[:end]) { + return true + } + } else if !ValidName(p.val) { + return false // *[i]=x + } + return p.r == '[' // a[i]=x +} + +func (p *Parser) getAssign(needEqual bool) *Assign { + as := &Assign{} + if p.eqlOffs > 0 { // foo=bar + nameEnd := p.eqlOffs + if p.lang != LangPOSIX && p.val[p.eqlOffs-1] == '+' { + // a+=b + as.Append = true + nameEnd-- + } + as.Name = p.lit(p.pos, p.val[:nameEnd]) + // since we're not using the entire p.val + as.Name.ValueEnd = posAddCol(as.Name.ValuePos, nameEnd) + left := p.lit(posAddCol(p.pos, 1), p.val[p.eqlOffs+1:]) + if left.Value != "" { + left.ValuePos = posAddCol(left.ValuePos, p.eqlOffs) + as.Value = p.wordOne(left) + } + p.next() + } else { // foo[x]=bar + as.Name = p.lit(p.pos, p.val) + // hasValidIdent already checks p.r is '[' + p.rune() + p.pos = posAddCol(p.pos, 1) + as.Index = p.eitherIndex() + if p.spaced || p.stopToken() { + if needEqual { + p.followErr(as.Pos(), "a[b]", "=") + } else { + as.Naked = true + return as + } + } + if len(p.val) > 0 && p.val[0] == '+' { + as.Append = true + p.val = p.val[1:] + p.pos = posAddCol(p.pos, 1) + } + if len(p.val) < 1 || p.val[0] != '=' { + if as.Append { + p.followErr(as.Pos(), "a[b]+", "=") + } else { + p.followErr(as.Pos(), "a[b]", "=") + } + return nil + } + p.pos = posAddCol(p.pos, 1) + p.val = p.val[1:] + if p.val == "" { + p.next() + } + } + if p.spaced || p.stopToken() { + return as + } + if as.Value == nil && p.tok == leftParen { + if p.lang == LangPOSIX { + p.langErr(p.pos, "arrays", LangBash, LangMirBSDKorn) + } + if as.Index != nil { + p.curErr("arrays cannot be nested") + } + as.Array = &ArrayExpr{Lparen: p.pos} + newQuote := p.quote + if p.lang.isBash() { + newQuote = arrayElems + } + old := p.preNested(newQuote) + p.next() + p.got(_Newl) + for p.tok != _EOF && p.tok != rightParen { + ae := &ArrayElem{} + ae.Comments, p.accComs = p.accComs, nil + if p.tok == leftBrack { + left := p.pos + ae.Index = p.eitherIndex() + p.follow(left, `"[x]"`, assgn) + } + if ae.Value = p.getWord(); ae.Value == nil { + switch p.tok { + case leftParen: + p.curErr("arrays cannot be nested") + return nil + case _Newl, rightParen, leftBrack: + // TODO: support [index]=[ + default: + p.curErr("array element values must be words") + return nil + } + } + if len(p.accComs) > 0 { + c := p.accComs[0] + if c.Pos().Line() == ae.End().Line() { + ae.Comments = append(ae.Comments, c) + p.accComs = p.accComs[1:] + } + } + as.Array.Elems = append(as.Array.Elems, ae) + p.got(_Newl) + } + as.Array.Last, p.accComs = p.accComs, nil + p.postNested(old) + as.Array.Rparen = p.matched(as.Array.Lparen, leftParen, rightParen) + } else if w := p.getWord(); w != nil { + if as.Value == nil { + as.Value = w + } else { + as.Value.Parts = append(as.Value.Parts, w.Parts...) + } + } + return as +} + +func (p *Parser) peekRedir() bool { + switch p.tok { + case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut, + hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir: + return true + } + return false +} + +func (p *Parser) doRedirect(s *Stmt) { + var r *Redirect + if s.Redirs == nil { + var alloc struct { + redirs [4]*Redirect + redir Redirect + } + s.Redirs = alloc.redirs[:0] + r = &alloc.redir + s.Redirs = append(s.Redirs, r) + } else { + r = &Redirect{} + s.Redirs = append(s.Redirs, r) + } + r.N = p.getLit() + if !p.lang.isBash() && r.N != nil && r.N.Value[0] == '{' { + p.langErr(r.N.Pos(), "{varname} redirects", LangBash) + } + if p.lang == LangPOSIX && (p.tok == rdrAll || p.tok == appAll) { + p.langErr(p.pos, "&> redirects", LangBash, LangMirBSDKorn) + } + r.Op, r.OpPos = RedirOperator(p.tok), p.pos + p.next() + switch r.Op { + case Hdoc, DashHdoc: + old := p.quote + p.quote, p.forbidNested = hdocWord, true + p.heredocs = append(p.heredocs, r) + r.Word = p.followWordTok(token(r.Op), r.OpPos) + p.quote, p.forbidNested = old, false + if p.tok == _Newl { + if len(p.accComs) > 0 { + c := p.accComs[0] + if c.Pos().Line() == s.End().Line() { + s.Comments = append(s.Comments, c) + p.accComs = p.accComs[1:] + } + } + p.doHeredocs() + } + case WordHdoc: + if p.lang == LangPOSIX { + p.langErr(r.OpPos, "herestrings", LangBash, LangMirBSDKorn) + } + fallthrough + default: + r.Word = p.followWordTok(token(r.Op), r.OpPos) + } +} + +func (p *Parser) getStmt(readEnd, binCmd, fnBody bool) *Stmt { + pos, ok := p.gotRsrv("!") + s := &Stmt{Position: pos} + if ok { + s.Negated = true + if p.stopToken() { + p.posErr(s.Pos(), `"!" cannot form a statement alone`) + } + if _, ok := p.gotRsrv("!"); ok { + p.posErr(s.Pos(), `cannot negate a command multiple times`) + } + } + if s = p.gotStmtPipe(s, false); s == nil || p.err != nil { + return nil + } + // instead of using recursion, iterate manually + for p.tok == andAnd || p.tok == orOr { + if binCmd { + // left associativity: in a list of BinaryCmds, the + // right recursion should only read a single element + return s + } + b := &BinaryCmd{ + OpPos: p.pos, + Op: BinCmdOperator(p.tok), + X: s, + } + p.next() + p.got(_Newl) + b.Y = p.getStmt(false, true, false) + if b.Y == nil || p.err != nil { + p.followErr(b.OpPos, b.Op.String(), "a statement") + return nil + } + s = &Stmt{Position: s.Position} + s.Cmd = b + s.Comments, b.X.Comments = b.X.Comments, nil + } + if readEnd { + switch p.tok { + case semicolon: + s.Semicolon = p.pos + p.next() + case and: + s.Semicolon = p.pos + p.next() + s.Background = true + case orAnd: + s.Semicolon = p.pos + p.next() + s.Coprocess = true + } + } + if len(p.accComs) > 0 && !binCmd && !fnBody { + c := p.accComs[0] + if c.Pos().Line() == s.End().Line() { + s.Comments = append(s.Comments, c) + p.accComs = p.accComs[1:] + } + } + return s +} + +func (p *Parser) gotStmtPipe(s *Stmt, binCmd bool) *Stmt { + s.Comments, p.accComs = p.accComs, nil + switch p.tok { + case _LitWord: + switch p.val { + case "{": + p.block(s) + case "if": + p.ifClause(s) + case "while", "until": + p.whileClause(s, p.val == "until") + case "for": + p.forClause(s) + case "case": + p.caseClause(s) + case "}": + p.curErr(`%q can only be used to close a block`, p.val) + case "then": + p.curErr(`%q can only be used in an if`, p.val) + case "elif": + p.curErr(`%q can only be used in an if`, p.val) + case "fi": + p.curErr(`%q can only be used to end an if`, p.val) + case "do": + p.curErr(`%q can only be used in a loop`, p.val) + case "done": + p.curErr(`%q can only be used to end a loop`, p.val) + case "esac": + p.curErr(`%q can only be used to end a case`, p.val) + case "!": + if !s.Negated { + p.curErr(`"!" can only be used in full statements`) + break + } + case "[[": + if p.lang != LangPOSIX { + p.testClause(s) + } + case "]]": + if p.lang != LangPOSIX { + p.curErr(`%q can only be used to close a test`, p.val) + } + case "let": + if p.lang != LangPOSIX { + p.letClause(s) + } + case "function": + if p.lang != LangPOSIX { + p.bashFuncDecl(s) + } + case "declare": + if p.lang.isBash() { // Note that mksh lacks this one. + p.declClause(s) + } + case "local", "export", "readonly", "typeset", "nameref": + if p.lang != LangPOSIX { + p.declClause(s) + } + case "time": + if p.lang != LangPOSIX { + p.timeClause(s) + } + case "coproc": + if p.lang.isBash() { // Note that mksh lacks this one. + p.coprocClause(s) + } + case "select": + if p.lang != LangPOSIX { + p.selectClause(s) + } + case "@test": + if p.lang == LangBats { + p.testDecl(s) + } + } + if s.Cmd != nil { + break + } + if p.hasValidIdent() { + p.callExpr(s, nil, true) + break + } + name := p.lit(p.pos, p.val) + if p.next(); p.got(leftParen) { + p.follow(name.ValuePos, "foo(", rightParen) + if p.lang == LangPOSIX && !ValidName(name.Value) { + p.posErr(name.Pos(), "invalid func name") + } + p.funcDecl(s, name, name.ValuePos, true) + } else { + p.callExpr(s, p.wordOne(name), false) + } + case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut, + hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir: + p.doRedirect(s) + p.callExpr(s, nil, false) + case bckQuote: + if p.backquoteEnd() { + return nil + } + fallthrough + case _Lit, dollBrace, dollDblParen, dollParen, dollar, cmdIn, cmdOut, + sglQuote, dollSglQuote, dblQuote, dollDblQuote, dollBrack, + globQuest, globStar, globPlus, globAt, globExcl: + if p.hasValidIdent() { + p.callExpr(s, nil, true) + break + } + w := p.wordAnyNumber() + if p.got(leftParen) { + p.posErr(w.Pos(), "invalid func name") + } + p.callExpr(s, w, false) + case leftParen: + p.subshell(s) + case dblLeftParen: + p.arithmExpCmd(s) + default: + if len(s.Redirs) == 0 { + return nil + } + } + for p.peekRedir() { + p.doRedirect(s) + } + // instead of using recursion, iterate manually + for p.tok == or || p.tok == orAnd { + if binCmd { + // left associativity: in a list of BinaryCmds, the + // right recursion should only read a single element + return s + } + if p.tok == orAnd && p.lang == LangMirBSDKorn { + // No need to check for LangPOSIX, as on that language + // we parse |& as two tokens. + break + } + b := &BinaryCmd{OpPos: p.pos, Op: BinCmdOperator(p.tok), X: s} + p.next() + p.got(_Newl) + if b.Y = p.gotStmtPipe(&Stmt{Position: p.pos}, true); b.Y == nil || p.err != nil { + p.followErr(b.OpPos, b.Op.String(), "a statement") + break + } + s = &Stmt{Position: s.Position} + s.Cmd = b + s.Comments, b.X.Comments = b.X.Comments, nil + // in "! x | y", the bang applies to the entire pipeline + s.Negated = b.X.Negated + b.X.Negated = false + } + return s +} + +func (p *Parser) subshell(s *Stmt) { + sub := &Subshell{Lparen: p.pos} + old := p.preNested(subCmd) + p.next() + sub.Stmts, sub.Last = p.stmtList() + p.postNested(old) + sub.Rparen = p.matched(sub.Lparen, leftParen, rightParen) + s.Cmd = sub +} + +func (p *Parser) arithmExpCmd(s *Stmt) { + ar := &ArithmCmd{Left: p.pos} + old := p.preNested(arithmExprCmd) + p.next() + if p.got(hash) { + if p.lang != LangMirBSDKorn { + p.langErr(ar.Pos(), "unsigned expressions", LangMirBSDKorn) + } + ar.Unsigned = true + } + ar.X = p.followArithm(dblLeftParen, ar.Left) + ar.Right = p.arithmEnd(dblLeftParen, ar.Left, old) + s.Cmd = ar +} + +func (p *Parser) block(s *Stmt) { + b := &Block{Lbrace: p.pos} + p.next() + b.Stmts, b.Last = p.stmtList("}") + pos, ok := p.gotRsrv("}") + b.Rbrace = pos + if !ok { + p.matchingErr(b.Lbrace, "{", "}") + } + s.Cmd = b +} + +func (p *Parser) ifClause(s *Stmt) { + rootIf := &IfClause{Position: p.pos} + p.next() + rootIf.Cond, rootIf.CondLast = p.followStmts("if", rootIf.Position, "then") + rootIf.ThenPos = p.followRsrv(rootIf.Position, "if ", "then") + rootIf.Then, rootIf.ThenLast = p.followStmts("then", rootIf.ThenPos, "fi", "elif", "else") + curIf := rootIf + for p.tok == _LitWord && p.val == "elif" { + elf := &IfClause{Position: p.pos} + curIf.Last = p.accComs + p.accComs = nil + p.next() + elf.Cond, elf.CondLast = p.followStmts("elif", elf.Position, "then") + elf.ThenPos = p.followRsrv(elf.Position, "elif ", "then") + elf.Then, elf.ThenLast = p.followStmts("then", elf.ThenPos, "fi", "elif", "else") + curIf.Else = elf + curIf = elf + } + if elsePos, ok := p.gotRsrv("else"); ok { + curIf.Last = p.accComs + p.accComs = nil + els := &IfClause{Position: elsePos} + els.Then, els.ThenLast = p.followStmts("else", els.Position, "fi") + curIf.Else = els + curIf = els + } + curIf.Last = p.accComs + p.accComs = nil + rootIf.FiPos = p.stmtEnd(rootIf, "if", "fi") + for els := rootIf.Else; els != nil; els = els.Else { + // All the nested IfClauses share the same FiPos. + els.FiPos = rootIf.FiPos + } + s.Cmd = rootIf +} + +func (p *Parser) whileClause(s *Stmt, until bool) { + wc := &WhileClause{WhilePos: p.pos, Until: until} + rsrv := "while" + rsrvCond := "while " + if wc.Until { + rsrv = "until" + rsrvCond = "until " + } + p.next() + wc.Cond, wc.CondLast = p.followStmts(rsrv, wc.WhilePos, "do") + wc.DoPos = p.followRsrv(wc.WhilePos, rsrvCond, "do") + wc.Do, wc.DoLast = p.followStmts("do", wc.DoPos, "done") + wc.DonePos = p.stmtEnd(wc, rsrv, "done") + s.Cmd = wc +} + +func (p *Parser) forClause(s *Stmt) { + fc := &ForClause{ForPos: p.pos} + p.next() + fc.Loop = p.loop(fc.ForPos) + + start, end := "do", "done" + if pos, ok := p.gotRsrv("{"); ok { + if p.lang == LangPOSIX { + p.langErr(pos, "for loops with braces", LangBash, LangMirBSDKorn) + } + fc.DoPos = pos + fc.Braces = true + start, end = "{", "}" + } else { + fc.DoPos = p.followRsrv(fc.ForPos, "for foo [in words]", start) + } + + s.Comments = append(s.Comments, p.accComs...) + p.accComs = nil + fc.Do, fc.DoLast = p.followStmts(start, fc.DoPos, end) + fc.DonePos = p.stmtEnd(fc, "for", end) + s.Cmd = fc +} + +func (p *Parser) loop(fpos Pos) Loop { + if !p.lang.isBash() { + switch p.tok { + case leftParen, dblLeftParen: + p.langErr(p.pos, "c-style fors", LangBash) + } + } + if p.tok == dblLeftParen { + cl := &CStyleLoop{Lparen: p.pos} + old := p.preNested(arithmExprCmd) + p.next() + cl.Init = p.arithmExpr(false) + if !p.got(dblSemicolon) { + p.follow(p.pos, "expr", semicolon) + cl.Cond = p.arithmExpr(false) + p.follow(p.pos, "expr", semicolon) + } + cl.Post = p.arithmExpr(false) + cl.Rparen = p.arithmEnd(dblLeftParen, cl.Lparen, old) + p.got(semicolon) + p.got(_Newl) + return cl + } + return p.wordIter("for", fpos) +} + +func (p *Parser) wordIter(ftok string, fpos Pos) *WordIter { + wi := &WordIter{} + if wi.Name = p.getLit(); wi.Name == nil { + p.followErr(fpos, ftok, "a literal") + } + if p.got(semicolon) { + p.got(_Newl) + return wi + } + p.got(_Newl) + if pos, ok := p.gotRsrv("in"); ok { + wi.InPos = pos + for !p.stopToken() { + if w := p.getWord(); w == nil { + p.curErr("word list can only contain words") + } else { + wi.Items = append(wi.Items, w) + } + } + p.got(semicolon) + p.got(_Newl) + } else if p.tok == _LitWord && p.val == "do" { + } else { + p.followErr(fpos, ftok+" foo", `"in", "do", ;, or a newline`) + } + return wi +} + +func (p *Parser) selectClause(s *Stmt) { + fc := &ForClause{ForPos: p.pos, Select: true} + p.next() + fc.Loop = p.wordIter("select", fc.ForPos) + fc.DoPos = p.followRsrv(fc.ForPos, "select foo [in words]", "do") + fc.Do, fc.DoLast = p.followStmts("do", fc.DoPos, "done") + fc.DonePos = p.stmtEnd(fc, "select", "done") + s.Cmd = fc +} + +func (p *Parser) caseClause(s *Stmt) { + cc := &CaseClause{Case: p.pos} + p.next() + cc.Word = p.getWord() + if cc.Word == nil { + p.followErr(cc.Case, "case", "a word") + } + end := "esac" + p.got(_Newl) + if pos, ok := p.gotRsrv("{"); ok { + cc.In = pos + cc.Braces = true + if p.lang != LangMirBSDKorn { + p.posErr(cc.Pos(), `"case i {" is a mksh feature`) + } + end = "}" + } else { + cc.In = p.followRsrv(cc.Case, "case x", "in") + } + cc.Items = p.caseItems(end) + cc.Last, p.accComs = p.accComs, nil + cc.Esac = p.stmtEnd(cc, "case", end) + s.Cmd = cc +} + +func (p *Parser) caseItems(stop string) (items []*CaseItem) { + p.got(_Newl) + for p.tok != _EOF && (p.tok != _LitWord || p.val != stop) { + ci := &CaseItem{} + ci.Comments, p.accComs = p.accComs, nil + p.got(leftParen) + for p.tok != _EOF { + if w := p.getWord(); w == nil { + p.curErr("case patterns must consist of words") + } else { + ci.Patterns = append(ci.Patterns, w) + } + if p.tok == rightParen { + break + } + if !p.got(or) { + p.curErr("case patterns must be separated with |") + } + } + old := p.preNested(switchCase) + p.next() + ci.Stmts, ci.Last = p.stmtList(stop) + p.postNested(old) + switch p.tok { + case dblSemicolon, semiAnd, dblSemiAnd, semiOr: + default: + ci.Op = Break + items = append(items, ci) + return + } + ci.Last = append(ci.Last, p.accComs...) + p.accComs = nil + ci.OpPos = p.pos + ci.Op = CaseOperator(p.tok) + p.next() + p.got(_Newl) + + // Split the comments: + // + // case x in + // a) + // foo + // ;; + // # comment for a + // # comment for b + // b) + // [...] + split := len(p.accComs) + for i := len(p.accComs) - 1; i >= 0; i-- { + c := p.accComs[i] + if c.Pos().Col() != p.pos.Col() { + break + } + split = i + } + ci.Comments = append(ci.Comments, p.accComs[:split]...) + p.accComs = p.accComs[split:] + + items = append(items, ci) + } + return +} + +func (p *Parser) testClause(s *Stmt) { + tc := &TestClause{Left: p.pos} + old := p.preNested(testExpr) + p.next() + if _, ok := p.gotRsrv("]]"); ok || p.tok == _EOF { + p.posErr(tc.Left, "test clause requires at least one expression") + } + tc.X = p.testExpr(false) + if tc.X == nil { + p.followErrExp(tc.Left, "[[") + } + tc.Right = p.pos + if _, ok := p.gotRsrv("]]"); !ok { + p.matchingErr(tc.Left, "[[", "]]") + } + p.postNested(old) + s.Cmd = tc +} + +func (p *Parser) testExpr(pastAndOr bool) TestExpr { + p.got(_Newl) + var left TestExpr + if pastAndOr { + left = p.testExprBase() + } else { + left = p.testExpr(true) + } + if left == nil { + return left + } + p.got(_Newl) + switch p.tok { + case andAnd, orOr: + case _LitWord: + if p.val == "]]" { + return left + } + if p.tok = token(testBinaryOp(p.val)); p.tok == illegalTok { + p.curErr("not a valid test operator: %s", p.val) + } + case rdrIn, rdrOut: + case _EOF, rightParen: + return left + case _Lit: + p.curErr("test operator words must consist of a single literal") + default: + p.curErr("not a valid test operator: %v", p.tok) + } + b := &BinaryTest{ + OpPos: p.pos, + Op: BinTestOperator(p.tok), + X: left, + } + // Save the previous quoteState, since we change it in TsReMatch. + oldQuote := p.quote + + switch b.Op { + case AndTest, OrTest: + p.next() + if b.Y = p.testExpr(false); b.Y == nil { + p.followErrExp(b.OpPos, b.Op.String()) + } + case TsReMatch: + if !p.lang.isBash() { + p.langErr(p.pos, "regex tests", LangBash) + } + p.rxOpenParens = 0 + p.rxFirstPart = true + // TODO(mvdan): Using nested states within a regex will break in + // all sorts of ways. The better fix is likely to use a stop + // token, like we do with heredocs. + p.quote = testExprRegexp + fallthrough + default: + if _, ok := b.X.(*Word); !ok { + p.posErr(b.OpPos, "expected %s, %s or %s after complex expr", + AndTest, OrTest, "]]") + } + p.next() + b.Y = p.followWordTok(token(b.Op), b.OpPos) + } + p.quote = oldQuote + return b +} + +func (p *Parser) testExprBase() TestExpr { + switch p.tok { + case _EOF, rightParen: + return nil + case _LitWord: + op := token(testUnaryOp(p.val)) + switch op { + case illegalTok: + case tsRefVar, tsModif: // not available in mksh + if p.lang.isBash() { + p.tok = op + } + default: + p.tok = op + } + } + switch p.tok { + case exclMark: + u := &UnaryTest{OpPos: p.pos, Op: TsNot} + p.next() + if u.X = p.testExpr(false); u.X == nil { + p.followErrExp(u.OpPos, u.Op.String()) + } + return u + case tsExists, tsRegFile, tsDirect, tsCharSp, tsBlckSp, tsNmPipe, + tsSocket, tsSmbLink, tsSticky, tsGIDSet, tsUIDSet, tsGrpOwn, + tsUsrOwn, tsModif, tsRead, tsWrite, tsExec, tsNoEmpty, + tsFdTerm, tsEmpStr, tsNempStr, tsOptSet, tsVarSet, tsRefVar: + u := &UnaryTest{OpPos: p.pos, Op: UnTestOperator(p.tok)} + p.next() + u.X = p.followWordTok(token(u.Op), u.OpPos) + return u + case leftParen: + pe := &ParenTest{Lparen: p.pos} + p.next() + if pe.X = p.testExpr(false); pe.X == nil { + p.followErrExp(pe.Lparen, "(") + } + pe.Rparen = p.matched(pe.Lparen, leftParen, rightParen) + return pe + case _LitWord: + if p.val == "]]" { + return nil + } + fallthrough + default: + if w := p.getWord(); w != nil { + return w + } + // otherwise we'd return a typed nil above + return nil + } +} + +func (p *Parser) declClause(s *Stmt) { + ds := &DeclClause{Variant: p.lit(p.pos, p.val)} + p.next() + for !p.stopToken() && !p.peekRedir() { + if p.hasValidIdent() { + ds.Args = append(ds.Args, p.getAssign(false)) + } else if p.eqlOffs > 0 { + p.curErr("invalid var name") + } else if p.tok == _LitWord && ValidName(p.val) { + ds.Args = append(ds.Args, &Assign{ + Naked: true, + Name: p.getLit(), + }) + } else if w := p.getWord(); w != nil { + ds.Args = append(ds.Args, &Assign{ + Naked: true, + Value: w, + }) + } else { + p.followErr(p.pos, ds.Variant.Value, "names or assignments") + } + } + s.Cmd = ds +} + +func isBashCompoundCommand(tok token, val string) bool { + switch tok { + case leftParen, dblLeftParen: + return true + case _LitWord: + switch val { + case "{", "if", "while", "until", "for", "case", "[[", + "coproc", "let", "function", "declare", "local", + "export", "readonly", "typeset", "nameref": + return true + } + } + return false +} + +func (p *Parser) timeClause(s *Stmt) { + tc := &TimeClause{Time: p.pos} + p.next() + if _, ok := p.gotRsrv("-p"); ok { + tc.PosixFormat = true + } + tc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) + s.Cmd = tc +} + +func (p *Parser) coprocClause(s *Stmt) { + cc := &CoprocClause{Coproc: p.pos} + if p.next(); isBashCompoundCommand(p.tok, p.val) { + // has no name + cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) + s.Cmd = cc + return + } + cc.Name = p.getWord() + cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) + if cc.Stmt == nil { + if cc.Name == nil { + p.posErr(cc.Coproc, "coproc clause requires a command") + return + } + // name was in fact the stmt + cc.Stmt = &Stmt{Position: cc.Name.Pos()} + cc.Stmt.Cmd = p.call(cc.Name) + cc.Name = nil + } else if cc.Name != nil { + if call, ok := cc.Stmt.Cmd.(*CallExpr); ok { + // name was in fact the start of a call + call.Args = append([]*Word{cc.Name}, call.Args...) + cc.Name = nil + } + } + s.Cmd = cc +} + +func (p *Parser) letClause(s *Stmt) { + lc := &LetClause{Let: p.pos} + old := p.preNested(arithmExprLet) + p.next() + for !p.stopToken() && !p.peekRedir() { + x := p.arithmExpr(true) + if x == nil { + break + } + lc.Exprs = append(lc.Exprs, x) + } + if len(lc.Exprs) == 0 { + p.followErrExp(lc.Let, "let") + } + p.postNested(old) + s.Cmd = lc +} + +func (p *Parser) bashFuncDecl(s *Stmt) { + fpos := p.pos + if p.next(); p.tok != _LitWord { + p.followErr(fpos, "function", "a name") + } + name := p.lit(p.pos, p.val) + hasParens := false + if p.next(); p.got(leftParen) { + hasParens = true + p.follow(name.ValuePos, "foo(", rightParen) + } + p.funcDecl(s, name, fpos, hasParens) +} + +func (p *Parser) testDecl(s *Stmt) { + td := &TestDecl{Position: p.pos} + p.next() + if td.Description = p.getWord(); td.Description == nil { + p.followErr(td.Position, "@test", "a description word") + } + if td.Body = p.getStmt(false, false, true); td.Body == nil { + p.followErr(td.Position, `@test "desc"`, "a statement") + } + s.Cmd = td +} + +func (p *Parser) callExpr(s *Stmt, w *Word, assign bool) { + ce := p.call(w) + if w == nil { + ce.Args = ce.Args[:0] + } + if assign { + ce.Assigns = append(ce.Assigns, p.getAssign(true)) + } +loop: + for { + switch p.tok { + case _EOF, _Newl, semicolon, and, or, andAnd, orOr, orAnd, + dblSemicolon, semiAnd, dblSemiAnd, semiOr: + break loop + case _LitWord: + if len(ce.Args) == 0 && p.hasValidIdent() { + ce.Assigns = append(ce.Assigns, p.getAssign(true)) + break + } + // Avoid failing later with the confusing "} can only be used to close a block". + if p.lang == LangPOSIX && p.val == "{" && w != nil && w.Lit() == "function" { + p.curErr("the %q builtin is a bash feature; tried parsing as posix", "function") + } + ce.Args = append(ce.Args, p.wordOne(p.lit(p.pos, p.val))) + p.next() + case _Lit: + if len(ce.Args) == 0 && p.hasValidIdent() { + ce.Assigns = append(ce.Assigns, p.getAssign(true)) + break + } + ce.Args = append(ce.Args, p.wordAnyNumber()) + case bckQuote: + if p.backquoteEnd() { + break loop + } + fallthrough + case dollBrace, dollDblParen, dollParen, dollar, cmdIn, cmdOut, + sglQuote, dollSglQuote, dblQuote, dollDblQuote, dollBrack, + globQuest, globStar, globPlus, globAt, globExcl: + ce.Args = append(ce.Args, p.wordAnyNumber()) + case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut, + hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir: + p.doRedirect(s) + case dblLeftParen: + p.curErr("%s can only be used to open an arithmetic cmd", p.tok) + case rightParen: + if p.quote == subCmd { + break loop + } + fallthrough + default: + // Note that we'll only keep the first error that happens. + if len(ce.Args) > 0 { + if cmd := ce.Args[0].Lit(); p.lang == LangPOSIX && isBashCompoundCommand(_LitWord, cmd) { + p.curErr("the %q builtin is a bash feature; tried parsing as posix", cmd) + } + } + p.curErr("a command can only contain words and redirects; encountered %s", p.tok) + } + } + if len(ce.Assigns) == 0 && len(ce.Args) == 0 { + return + } + if len(ce.Args) == 0 { + ce.Args = nil + } else { + for _, asgn := range ce.Assigns { + if asgn.Index != nil || asgn.Array != nil { + p.posErr(asgn.Pos(), "inline variables cannot be arrays") + } + } + } + s.Cmd = ce +} + +func (p *Parser) funcDecl(s *Stmt, name *Lit, pos Pos, withParens bool) { + fd := &FuncDecl{ + Position: pos, + RsrvWord: pos != name.ValuePos, + Parens: withParens, + Name: name, + } + p.got(_Newl) + if fd.Body = p.getStmt(false, false, true); fd.Body == nil { + p.followErr(fd.Pos(), "foo()", "a statement") + } + s.Cmd = fd +} diff --git a/cli/connhelper/internal/syntax/quote.go b/cli/connhelper/internal/syntax/quote.go new file mode 100644 index 0000000000..6f27eba12d --- /dev/null +++ b/cli/connhelper/internal/syntax/quote.go @@ -0,0 +1,185 @@ +// Copyright (c) 2021, Daniel Martí +// See LICENSE for licensing information + +package syntax + +import ( + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +type QuoteError struct { + ByteOffset int + Message string +} + +func (e QuoteError) Error() string { + return fmt.Sprintf("cannot quote character at byte %d: %s", e.ByteOffset, e.Message) +} + +const ( + quoteErrNull = "shell strings cannot contain null bytes" + quoteErrPOSIX = "POSIX shell lacks escape sequences" + quoteErrRange = "rune out of range" + quoteErrMksh = "mksh cannot escape codepoints above 16 bits" +) + +// Quote returns a quoted version of the input string, +// so that the quoted version is expanded or interpreted +// as the original string in the given language variant. +// +// Quoting is necessary when using arbitrary literal strings +// as words in a shell script or command. +// Without quoting, one can run into syntax errors, +// as well as the possibility of running unintended code. +// +// An error is returned when a string cannot be quoted for a variant. +// For instance, POSIX lacks escape sequences for non-printable characters, +// and no language variant can represent a string containing null bytes. +// In such cases, the returned error type will be *QuoteError. +// +// The quoting strategy is chosen on a best-effort basis, +// to minimize the amount of extra bytes necessary. +// +// Some strings do not require any quoting and are returned unchanged. +// Those strings can be directly surrounded in single quotes as well. +func Quote(s string, lang LangVariant) (string, error) { + if s == "" { + // Special case; an empty string must always be quoted, + // as otherwise it expands to zero fields. + return "''", nil + } + shellChars := false + nonPrintable := false + offs := 0 + for rem := s; len(rem) > 0; { + r, size := utf8.DecodeRuneInString(rem) + switch r { + // Like regOps; token characters. + case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`', + // Whitespace; might result in multiple fields. + ' ', '\t', '\r', '\n', + // Escape sequences would be expanded. + '\\', + // Would start a comment unless quoted. + '#', + // Might result in brace expansion. + '{', + // Might result in tilde expansion. + '~', + // Might result in globbing. + '*', '?', '[', + // Might result in an assignment. + '=': + shellChars = true + case '\x00': + return "", &QuoteError{ByteOffset: offs, Message: quoteErrNull} + } + if r == utf8.RuneError || !unicode.IsPrint(r) { + if lang == LangPOSIX { + return "", &QuoteError{ByteOffset: offs, Message: quoteErrPOSIX} + } + nonPrintable = true + } + rem = rem[size:] + offs += size + } + if !shellChars && !nonPrintable && !IsKeyword(s) { + // Nothing to quote; avoid allocating. + return s, nil + } + + // Single quotes are usually best, + // as they don't require any escaping of characters. + // If we have any invalid utf8 or non-printable runes, + // use $'' so that we can escape them. + // Note that we can't use double quotes for those. + var b strings.Builder + if nonPrintable { + b.WriteString("$'") + lastRequoteIfHex := false + offs := 0 + for rem := s; len(rem) > 0; { + nextRequoteIfHex := false + r, size := utf8.DecodeRuneInString(rem) + switch { + case r == '\'', r == '\\': + b.WriteByte('\\') + b.WriteRune(r) + case unicode.IsPrint(r) && r != utf8.RuneError: + if lastRequoteIfHex && isHex(r) { + b.WriteString("'$'") + } + b.WriteRune(r) + case r == '\a': + b.WriteString(`\a`) + case r == '\b': + b.WriteString(`\b`) + case r == '\f': + b.WriteString(`\f`) + case r == '\n': + b.WriteString(`\n`) + case r == '\r': + b.WriteString(`\r`) + case r == '\t': + b.WriteString(`\t`) + case r == '\v': + b.WriteString(`\v`) + case r < utf8.RuneSelf, r == utf8.RuneError && size == 1: + // \xXX, fixed at two hexadecimal characters. + fmt.Fprintf(&b, "\\x%02x", rem[0]) + // Unfortunately, mksh allows \x to consume more hex characters. + // Ensure that we don't allow it to read more than two. + if lang == LangMirBSDKorn { + nextRequoteIfHex = true + } + case r > utf8.MaxRune: + // Not a valid Unicode code point? + return "", &QuoteError{ByteOffset: offs, Message: quoteErrRange} + case lang == LangMirBSDKorn && r > 0xFFFD: + // From the CAVEATS section in R59's man page: + // + // mksh currently uses OPTU-16 internally, which is the same as + // UTF-8 and CESU-8 with 0000..FFFD being valid codepoints. + return "", &QuoteError{ByteOffset: offs, Message: quoteErrMksh} + case r < 0x10000: + // \uXXXX, fixed at four hexadecimal characters. + fmt.Fprintf(&b, "\\u%04x", r) + default: + // \UXXXXXXXX, fixed at eight hexadecimal characters. + fmt.Fprintf(&b, "\\U%08x", r) + } + rem = rem[size:] + lastRequoteIfHex = nextRequoteIfHex + offs += size + } + b.WriteString("'") + return b.String(), nil + } + + // Single quotes without any need for escaping. + if !strings.Contains(s, "'") { + return "'" + s + "'", nil + } + + // The string contains single quotes, + // so fall back to double quotes. + b.WriteByte('"') + for _, r := range s { + switch r { + case '"', '\\', '`', '$': + b.WriteByte('\\') + } + b.WriteRune(r) + } + b.WriteByte('"') + return b.String(), nil +} + +func isHex(r rune) bool { + return (r >= '0' && r <= '9') || + (r >= 'a' && r <= 'f') || + (r >= 'A' && r <= 'F') +} From 52d2a9b5ae77c6e3417507c484e9e9f5d6d430fc Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Wed, 5 Feb 2025 15:12:27 +0100 Subject: [PATCH 2/4] cli/connhelper/internal/syntax: remove unused code from fork Signed-off-by: Sebastiaan van Stijn --- cli/connhelper/internal/syntax/parser.go | 2369 ---------------------- 1 file changed, 2369 deletions(-) diff --git a/cli/connhelper/internal/syntax/parser.go b/cli/connhelper/internal/syntax/parser.go index 0bc10e8694..d0382299fb 100644 --- a/cli/connhelper/internal/syntax/parser.go +++ b/cli/connhelper/internal/syntax/parser.go @@ -3,25 +3,6 @@ package syntax -import ( - "fmt" - "io" - "strconv" - "strings" - "unicode/utf8" -) - -// ParserOption is a function which can be passed to NewParser -// to alter its behavior. To apply option to existing Parser -// call it directly, for example KeepComments(true)(parser). -type ParserOption func(*Parser) - -// KeepComments makes the parser parse comments and attach them to -// nodes, as opposed to discarding them. -func KeepComments(enabled bool) ParserOption { - return func(p *Parser) { p.keepComments = enabled } -} - // LangVariant describes a shell language variant to use when tokenizing and // parsing shell code. The zero value is [LangBash]. type LangVariant int @@ -66,22 +47,6 @@ const ( LangAuto ) -// Variant changes the shell language variant that the parser will -// accept. -// -// The passed language variant must be one of the constant values defined in -// this package. -func Variant(l LangVariant) ParserOption { - switch l { - case LangBash, LangPOSIX, LangMirBSDKorn, LangBats: - case LangAuto: - panic("LangAuto is not supported by the parser at this time") - default: - panic(fmt.Sprintf("unknown shell language variant: %d", l)) - } - return func(p *Parser) { p.lang = l } -} - func (l LangVariant) String() string { switch l { case LangBash: @@ -98,652 +63,6 @@ func (l LangVariant) String() string { return "unknown shell language variant" } -func (l *LangVariant) Set(s string) error { - switch s { - case "bash": - *l = LangBash - case "posix", "sh": - *l = LangPOSIX - case "mksh": - *l = LangMirBSDKorn - case "bats": - *l = LangBats - case "auto": - *l = LangAuto - default: - return fmt.Errorf("unknown shell language variant: %q", s) - } - return nil -} - -func (l LangVariant) isBash() bool { - return l == LangBash || l == LangBats -} - -// StopAt configures the lexer to stop at an arbitrary word, treating it -// as if it were the end of the input. It can contain any characters -// except whitespace, and cannot be over four bytes in size. -// -// This can be useful to embed shell code within another language, as -// one can use a special word to mark the delimiters between the two. -// -// As a word, it will only apply when following whitespace or a -// separating token. For example, StopAt("$$") will act on the inputs -// "foo $$" and "foo;$$", but not on "foo '$$'". -// -// The match is done by prefix, so the example above will also act on -// "foo $$bar". -func StopAt(word string) ParserOption { - if len(word) > 4 { - panic("stop word can't be over four bytes in size") - } - if strings.ContainsAny(word, " \t\n\r") { - panic("stop word can't contain whitespace characters") - } - return func(p *Parser) { p.stopAt = []byte(word) } -} - -// NewParser allocates a new [Parser] and applies any number of options. -func NewParser(options ...ParserOption) *Parser { - p := &Parser{} - for _, opt := range options { - opt(p) - } - return p -} - -// Parse reads and parses a shell program with an optional name. It -// returns the parsed program if no issues were encountered. Otherwise, -// an error is returned. Reads from r are buffered. -// -// Parse can be called more than once, but not concurrently. That is, a -// Parser can be reused once it is done working. -func (p *Parser) Parse(r io.Reader, name string) (*File, error) { - p.reset() - p.f = &File{Name: name} - p.src = r - p.rune() - p.next() - p.f.Stmts, p.f.Last = p.stmtList() - if p.err == nil { - // EOF immediately after heredoc word so no newline to - // trigger it - p.doHeredocs() - } - return p.f, p.err -} - -// Stmts reads and parses statements one at a time, calling a function -// each time one is parsed. If the function returns false, parsing is -// stopped and the function is not called again. -func (p *Parser) Stmts(r io.Reader, fn func(*Stmt) bool) error { - p.reset() - p.f = &File{} - p.src = r - p.rune() - p.next() - p.stmts(fn) - if p.err == nil { - // EOF immediately after heredoc word so no newline to - // trigger it - p.doHeredocs() - } - return p.err -} - -type wrappedReader struct { - *Parser - io.Reader - - lastLine int64 - accumulated []*Stmt - fn func([]*Stmt) bool -} - -func (w *wrappedReader) Read(p []byte) (n int, err error) { - // If we lexed a newline for the first time, we just finished a line, so - // we may need to give a callback for the edge cases below not covered - // by Parser.Stmts. - if (w.r == '\n' || w.r == escNewl) && w.line > w.lastLine { - if w.Incomplete() { - // Incomplete statement; call back to print "> ". - if !w.fn(w.accumulated) { - return 0, io.EOF - } - } else if len(w.accumulated) == 0 { - // Nothing was parsed; call back to print another "$ ". - if !w.fn(nil) { - return 0, io.EOF - } - } - w.lastLine = w.line - } - return w.Reader.Read(p) -} - -// Interactive implements what is necessary to parse statements in an -// interactive shell. The parser will call the given function under two -// circumstances outlined below. -// -// If a line containing any number of statements is parsed, the function will be -// called with said statements. -// -// If a line ending in an incomplete statement is parsed, the function will be -// called with any fully parsed statements, and [Parser.Incomplete] will return true. -// -// One can imagine a simple interactive shell implementation as follows: -// -// fmt.Fprintf(os.Stdout, "$ ") -// parser.Interactive(os.Stdin, func(stmts []*syntax.Stmt) bool { -// if parser.Incomplete() { -// fmt.Fprintf(os.Stdout, "> ") -// return true -// } -// run(stmts) -// fmt.Fprintf(os.Stdout, "$ ") -// return true -// } -// -// If the callback function returns false, parsing is stopped and the function -// is not called again. -func (p *Parser) Interactive(r io.Reader, fn func([]*Stmt) bool) error { - w := wrappedReader{Parser: p, Reader: r, fn: fn} - return p.Stmts(&w, func(stmt *Stmt) bool { - w.accumulated = append(w.accumulated, stmt) - // We finished parsing a statement and we're at a newline token, - // so we finished fully parsing a number of statements. Call - // back to run the statements and print "$ ". - if p.tok == _Newl { - if !fn(w.accumulated) { - return false - } - w.accumulated = w.accumulated[:0] - // The callback above would already print "$ ", so we - // don't want the subsequent wrappedReader.Read to cause - // another "$ " print thinking that nothing was parsed. - w.lastLine = w.line + 1 - } - return true - }) -} - -// Words reads and parses words one at a time, calling a function each time one -// is parsed. If the function returns false, parsing is stopped and the function -// is not called again. -// -// Newlines are skipped, meaning that multi-line input will work fine. If the -// parser encounters a token that isn't a word, such as a semicolon, an error -// will be returned. -// -// Note that the lexer doesn't currently tokenize spaces, so it may need to read -// a non-space byte such as a newline or a letter before finishing the parsing -// of a word. This will be fixed in the future. -func (p *Parser) Words(r io.Reader, fn func(*Word) bool) error { - p.reset() - p.f = &File{} - p.src = r - p.rune() - p.next() - for { - p.got(_Newl) - w := p.getWord() - if w == nil { - if p.tok != _EOF { - p.curErr("%s is not a valid word", p.tok) - } - return p.err - } - if !fn(w) { - return nil - } - } -} - -// Document parses a single here-document word. That is, it parses the input as -// if they were lines following a < 0 || p.litBs != nil -} - -const bufSize = 1 << 10 - -func (p *Parser) reset() { - p.tok, p.val = illegalTok, "" - p.eqlOffs = 0 - p.bs, p.bsp = nil, 0 - p.offs, p.line, p.col = 0, 1, 1 - p.r, p.w = 0, 0 - p.err, p.readErr = nil, nil - p.quote, p.forbidNested = noState, false - p.openStmts = 0 - p.heredocs, p.buriedHdocs = p.heredocs[:0], 0 - p.hdocStops = nil - p.parsingDoc = false - p.openBquotes = 0 - p.accComs = nil - p.accComs, p.curComs = nil, &p.accComs - p.litBatch = nil - p.wordBatch = nil - p.litBs = nil -} - -func (p *Parser) nextPos() Pos { - // Basic protection against offset overflow; - // note that an offset of 0 is valid, so we leave the maximum. - offset := min(p.offs+int64(p.bsp)-int64(p.w), offsetMax) - var line, col uint - if p.line <= lineMax { - line = uint(p.line) - } - if p.col <= colMax { - col = uint(p.col) - } - return NewPos(uint(offset), line, col) -} - -func (p *Parser) lit(pos Pos, val string) *Lit { - if len(p.litBatch) == 0 { - p.litBatch = make([]Lit, 32) - } - l := &p.litBatch[0] - p.litBatch = p.litBatch[1:] - l.ValuePos = pos - l.ValueEnd = p.nextPos() - l.Value = val - return l -} - -type wordAlloc struct { - word Word - parts [1]WordPart -} - -func (p *Parser) wordAnyNumber() *Word { - if len(p.wordBatch) == 0 { - p.wordBatch = make([]wordAlloc, 32) - } - alloc := &p.wordBatch[0] - p.wordBatch = p.wordBatch[1:] - w := &alloc.word - w.Parts = p.wordParts(alloc.parts[:0]) - return w -} - -func (p *Parser) wordOne(part WordPart) *Word { - if len(p.wordBatch) == 0 { - p.wordBatch = make([]wordAlloc, 32) - } - alloc := &p.wordBatch[0] - p.wordBatch = p.wordBatch[1:] - w := &alloc.word - w.Parts = alloc.parts[:1] - w.Parts[0] = part - return w -} - -func (p *Parser) call(w *Word) *CallExpr { - var alloc struct { - ce CallExpr - ws [4]*Word - } - ce := &alloc.ce - ce.Args = alloc.ws[:1] - ce.Args[0] = w - return ce -} - -//go:generate stringer -type=quoteState - -type quoteState uint32 - -const ( - noState quoteState = 1 << iota - subCmd - subCmdBckquo - dblQuotes - hdocWord - hdocBody - hdocBodyTabs - arithmExpr - arithmExprLet - arithmExprCmd - arithmExprBrack - testExpr - testExprRegexp - switchCase - paramExpName - paramExpSlice - paramExpRepl - paramExpExp - arrayElems - - allKeepSpaces = paramExpRepl | dblQuotes | hdocBody | - hdocBodyTabs | paramExpExp - allRegTokens = noState | subCmd | subCmdBckquo | hdocWord | - switchCase | arrayElems | testExpr - allArithmExpr = arithmExpr | arithmExprLet | arithmExprCmd | - arithmExprBrack | paramExpSlice - allParamReg = paramExpName | paramExpSlice - allParamExp = allParamReg | paramExpRepl | paramExpExp | arithmExprBrack -) - -type saveState struct { - quote quoteState - buriedHdocs int -} - -func (p *Parser) preNested(quote quoteState) (s saveState) { - s.quote, s.buriedHdocs = p.quote, p.buriedHdocs - p.buriedHdocs, p.quote = len(p.heredocs), quote - return -} - -func (p *Parser) postNested(s saveState) { - p.quote, p.buriedHdocs = s.quote, s.buriedHdocs -} - -func (p *Parser) unquotedWordBytes(w *Word) ([]byte, bool) { - buf := make([]byte, 0, 4) - didUnquote := false - for _, wp := range w.Parts { - buf, didUnquote = p.unquotedWordPart(buf, wp, false) - } - return buf, didUnquote -} - -func (p *Parser) unquotedWordPart(buf []byte, wp WordPart, quotes bool) (_ []byte, quoted bool) { - switch wp := wp.(type) { - case *Lit: - for i := 0; i < len(wp.Value); i++ { - if b := wp.Value[i]; b == '\\' && !quotes { - if i++; i < len(wp.Value) { - buf = append(buf, wp.Value[i]) - } - quoted = true - } else { - buf = append(buf, b) - } - } - case *SglQuoted: - buf = append(buf, []byte(wp.Value)...) - quoted = true - case *DblQuoted: - for _, wp2 := range wp.Parts { - buf, _ = p.unquotedWordPart(buf, wp2, true) - } - quoted = true - } - return buf, quoted -} - -func (p *Parser) doHeredocs() { - hdocs := p.heredocs[p.buriedHdocs:] - if len(hdocs) == 0 { - // Nothing do do; don't even issue a read. - return - } - p.rune() // consume '\n', since we know p.tok == _Newl - old := p.quote - p.heredocs = p.heredocs[:p.buriedHdocs] - for i, r := range hdocs { - if p.err != nil { - break - } - p.quote = hdocBody - if r.Op == DashHdoc { - p.quote = hdocBodyTabs - } - stop, quoted := p.unquotedWordBytes(r.Word) - p.hdocStops = append(p.hdocStops, stop) - if i > 0 && p.r == '\n' { - p.rune() - } - lastLine := p.line - if quoted { - r.Hdoc = p.quotedHdocWord() - } else { - p.next() - r.Hdoc = p.getWord() - } - if r.Hdoc != nil { - lastLine = int64(r.Hdoc.End().Line()) - } - if lastLine < p.line { - // TODO: It seems like this triggers more often than it - // should. Look into it. - l := p.lit(p.nextPos(), "") - if r.Hdoc == nil { - r.Hdoc = p.wordOne(l) - } else { - r.Hdoc.Parts = append(r.Hdoc.Parts, l) - } - } - if stop := p.hdocStops[len(p.hdocStops)-1]; stop != nil { - p.posErr(r.Pos(), "unclosed here-document '%s'", stop) - } - p.hdocStops = p.hdocStops[:len(p.hdocStops)-1] - } - p.quote = old -} - -func (p *Parser) got(tok token) bool { - if p.tok == tok { - p.next() - return true - } - return false -} - -func (p *Parser) gotRsrv(val string) (Pos, bool) { - pos := p.pos - if p.tok == _LitWord && p.val == val { - p.next() - return pos, true - } - return pos, false -} - -func readableStr(s string) string { - // don't quote tokens like & or } - if s != "" && s[0] >= 'a' && s[0] <= 'z' { - return strconv.Quote(s) - } - return s -} - -func (p *Parser) followErr(pos Pos, left, right string) { - leftStr := readableStr(left) - p.posErr(pos, "%s must be followed by %s", leftStr, right) -} - -func (p *Parser) followErrExp(pos Pos, left string) { - p.followErr(pos, left, "an expression") -} - -func (p *Parser) follow(lpos Pos, left string, tok token) { - if !p.got(tok) { - p.followErr(lpos, left, tok.String()) - } -} - -func (p *Parser) followRsrv(lpos Pos, left, val string) Pos { - pos, ok := p.gotRsrv(val) - if !ok { - p.followErr(lpos, left, fmt.Sprintf("%q", val)) - } - return pos -} - -func (p *Parser) followStmts(left string, lpos Pos, stops ...string) ([]*Stmt, []Comment) { - if p.got(semicolon) { - return nil, nil - } - newLine := p.got(_Newl) - stmts, last := p.stmtList(stops...) - if len(stmts) < 1 && !newLine { - p.followErr(lpos, left, "a statement list") - } - return stmts, last -} - -func (p *Parser) followWordTok(tok token, pos Pos) *Word { - w := p.getWord() - if w == nil { - p.followErr(pos, tok.String(), "a word") - } - return w -} - -func (p *Parser) stmtEnd(n Node, start, end string) Pos { - pos, ok := p.gotRsrv(end) - if !ok { - p.posErr(n.Pos(), "%s statement must end with %q", start, end) - } - return pos -} - -func (p *Parser) quoteErr(lpos Pos, quote token) { - p.posErr(lpos, "reached %s without closing quote %s", - p.tok.String(), quote) -} - -func (p *Parser) matchingErr(lpos Pos, left, right any) { - p.posErr(lpos, "reached %s without matching %s with %s", - p.tok.String(), left, right) -} - -func (p *Parser) matched(lpos Pos, left, right token) Pos { - pos := p.pos - if !p.got(right) { - p.matchingErr(lpos, left, right) - } - return pos -} - -func (p *Parser) errPass(err error) { - if p.err == nil { - p.err = err - p.bsp = uint(len(p.bs)) + 1 - p.r = utf8.RuneSelf - p.w = 1 - p.tok = _EOF - } -} - -// IsIncomplete reports whether a Parser error could have been avoided with -// extra input bytes. For example, if an [io.EOF] was encountered while there was -// an unclosed quote or parenthesis. -func IsIncomplete(err error) bool { - perr, ok := err.(ParseError) - return ok && perr.Incomplete -} - // IsKeyword returns true if the given word is part of the language keywords. func IsKeyword(word string) bool { // This list has been copied from the bash 5.1 source code, file y.tab.c +4460 @@ -774,1691 +93,3 @@ func IsKeyword(word string) bool { } return false } - -// ParseError represents an error found when parsing a source file, from which -// the parser cannot recover. -type ParseError struct { - Filename string - Pos Pos - Text string - - Incomplete bool -} - -func (e ParseError) Error() string { - if e.Filename == "" { - return fmt.Sprintf("%s: %s", e.Pos.String(), e.Text) - } - return fmt.Sprintf("%s:%s: %s", e.Filename, e.Pos.String(), e.Text) -} - -// LangError is returned when the parser encounters code that is only valid in -// other shell language variants. The error includes what feature is not present -// in the current language variant, and what languages support it. -type LangError struct { - Filename string - Pos Pos - Feature string - Langs []LangVariant -} - -func (e LangError) Error() string { - var sb strings.Builder - if e.Filename != "" { - sb.WriteString(e.Filename + ":") - } - sb.WriteString(e.Pos.String() + ": ") - sb.WriteString(e.Feature) - if strings.HasSuffix(e.Feature, "s") { - sb.WriteString(" are a ") - } else { - sb.WriteString(" is a ") - } - for i, lang := range e.Langs { - if i > 0 { - sb.WriteString("/") - } - sb.WriteString(lang.String()) - } - sb.WriteString(" feature") - return sb.String() -} - -func (p *Parser) posErr(pos Pos, format string, a ...any) { - p.errPass(ParseError{ - Filename: p.f.Name, - Pos: pos, - Text: fmt.Sprintf(format, a...), - Incomplete: p.tok == _EOF && p.Incomplete(), - }) -} - -func (p *Parser) curErr(format string, a ...any) { - p.posErr(p.pos, format, a...) -} - -func (p *Parser) langErr(pos Pos, feature string, langs ...LangVariant) { - p.errPass(LangError{ - Filename: p.f.Name, - Pos: pos, - Feature: feature, - Langs: langs, - }) -} - -func (p *Parser) stmts(fn func(*Stmt) bool, stops ...string) { - gotEnd := true -loop: - for p.tok != _EOF { - newLine := p.got(_Newl) - switch p.tok { - case _LitWord: - for _, stop := range stops { - if p.val == stop { - break loop - } - } - case rightParen: - if p.quote == subCmd { - break loop - } - case bckQuote: - if p.backquoteEnd() { - break loop - } - case dblSemicolon, semiAnd, dblSemiAnd, semiOr: - if p.quote == switchCase { - break loop - } - p.curErr("%s can only be used in a case clause", p.tok) - } - if !newLine && !gotEnd { - p.curErr("statements must be separated by &, ; or a newline") - } - if p.tok == _EOF { - break - } - p.openStmts++ - s := p.getStmt(true, false, false) - p.openStmts-- - if s == nil { - p.invalidStmtStart() - break - } - gotEnd = s.Semicolon.IsValid() - if !fn(s) { - break - } - } -} - -func (p *Parser) stmtList(stops ...string) ([]*Stmt, []Comment) { - var stmts []*Stmt - var last []Comment - fn := func(s *Stmt) bool { - stmts = append(stmts, s) - return true - } - p.stmts(fn, stops...) - split := len(p.accComs) - if p.tok == _LitWord && (p.val == "elif" || p.val == "else" || p.val == "fi") { - // Split the comments, so that any aligned with an opening token - // get attached to it. For example: - // - // if foo; then - // # inside the body - // # document the else - // else - // fi - // TODO(mvdan): look into deduplicating this with similar logic - // in caseItems. - for i := len(p.accComs) - 1; i >= 0; i-- { - c := p.accComs[i] - if c.Pos().Col() != p.pos.Col() { - break - } - split = i - } - } - if split > 0 { // keep last nil if empty - last = p.accComs[:split] - } - p.accComs = p.accComs[split:] - return stmts, last -} - -func (p *Parser) invalidStmtStart() { - switch p.tok { - case semicolon, and, or, andAnd, orOr: - p.curErr("%s can only immediately follow a statement", p.tok) - case rightParen: - p.curErr("%s can only be used to close a subshell", p.tok) - default: - p.curErr("%s is not a valid start for a statement", p.tok) - } -} - -func (p *Parser) getWord() *Word { - if w := p.wordAnyNumber(); len(w.Parts) > 0 && p.err == nil { - return w - } - return nil -} - -func (p *Parser) getLit() *Lit { - switch p.tok { - case _Lit, _LitWord, _LitRedir: - l := p.lit(p.pos, p.val) - p.next() - return l - } - return nil -} - -func (p *Parser) wordParts(wps []WordPart) []WordPart { - for { - n := p.wordPart() - if n == nil { - if len(wps) == 0 { - return nil // normalize empty lists into nil - } - return wps - } - wps = append(wps, n) - if p.spaced { - return wps - } - } -} - -func (p *Parser) ensureNoNested() { - if p.forbidNested { - p.curErr("expansions not allowed in heredoc words") - } -} - -func (p *Parser) wordPart() WordPart { - switch p.tok { - case _Lit, _LitWord, _LitRedir: - l := p.lit(p.pos, p.val) - p.next() - return l - case dollBrace: - p.ensureNoNested() - switch p.r { - case '|': - if p.lang != LangMirBSDKorn { - p.curErr(`"${|stmts;}" is a mksh feature`) - } - fallthrough - case ' ', '\t', '\n': - if p.lang != LangMirBSDKorn { - p.curErr(`"${ stmts;}" is a mksh feature`) - } - cs := &CmdSubst{ - Left: p.pos, - TempFile: p.r != '|', - ReplyVar: p.r == '|', - } - old := p.preNested(subCmd) - p.rune() // don't tokenize '|' - p.next() - cs.Stmts, cs.Last = p.stmtList("}") - p.postNested(old) - pos, ok := p.gotRsrv("}") - if !ok { - p.matchingErr(cs.Left, "${", "}") - } - cs.Right = pos - return cs - default: - return p.paramExp() - } - case dollDblParen, dollBrack: - p.ensureNoNested() - left := p.tok - ar := &ArithmExp{Left: p.pos, Bracket: left == dollBrack} - var old saveState - if ar.Bracket { - old = p.preNested(arithmExprBrack) - } else { - old = p.preNested(arithmExpr) - } - p.next() - if p.got(hash) { - if p.lang != LangMirBSDKorn { - p.langErr(ar.Pos(), "unsigned expressions", LangMirBSDKorn) - } - ar.Unsigned = true - } - ar.X = p.followArithm(left, ar.Left) - if ar.Bracket { - if p.tok != rightBrack { - p.arithmMatchingErr(ar.Left, dollBrack, rightBrack) - } - p.postNested(old) - ar.Right = p.pos - p.next() - } else { - ar.Right = p.arithmEnd(dollDblParen, ar.Left, old) - } - return ar - case dollParen: - p.ensureNoNested() - cs := &CmdSubst{Left: p.pos} - old := p.preNested(subCmd) - p.next() - cs.Stmts, cs.Last = p.stmtList() - p.postNested(old) - cs.Right = p.matched(cs.Left, leftParen, rightParen) - return cs - case dollar: - r := p.r - switch { - case singleRuneParam(r): - p.tok, p.val = _LitWord, string(r) - p.rune() - case 'a' <= r && r <= 'z', 'A' <= r && r <= 'Z', - '0' <= r && r <= '9', r == '_', r == '\\': - p.advanceNameCont(r) - default: - l := p.lit(p.pos, "$") - p.next() - return l - } - p.ensureNoNested() - pe := &ParamExp{Dollar: p.pos, Short: true} - p.pos = posAddCol(p.pos, 1) - pe.Param = p.getLit() - if pe.Param != nil && pe.Param.Value == "" { - l := p.lit(pe.Dollar, "$") - // e.g. "$\\\"" within double quotes, so we must - // keep the rest of the literal characters. - l.ValueEnd = posAddCol(l.ValuePos, 1) - return l - } - return pe - case cmdIn, cmdOut: - p.ensureNoNested() - ps := &ProcSubst{Op: ProcOperator(p.tok), OpPos: p.pos} - old := p.preNested(subCmd) - p.next() - ps.Stmts, ps.Last = p.stmtList() - p.postNested(old) - ps.Rparen = p.matched(ps.OpPos, token(ps.Op), rightParen) - return ps - case sglQuote, dollSglQuote: - sq := &SglQuoted{Left: p.pos, Dollar: p.tok == dollSglQuote} - r := p.r - for p.newLit(r); ; r = p.rune() { - switch r { - case '\\': - if sq.Dollar { - p.rune() - } - case '\'': - sq.Right = p.nextPos() - sq.Value = p.endLit() - - p.rune() - p.next() - return sq - case escNewl: - p.litBs = append(p.litBs, '\\', '\n') - case utf8.RuneSelf: - p.tok = _EOF - p.quoteErr(sq.Pos(), sglQuote) - return nil - } - } - case dblQuote, dollDblQuote: - if p.quote == dblQuotes { - // p.tok == dblQuote, as "foo$" puts $ in the lit - return nil - } - return p.dblQuoted() - case bckQuote: - if p.backquoteEnd() { - return nil - } - p.ensureNoNested() - cs := &CmdSubst{Left: p.pos, Backquotes: true} - old := p.preNested(subCmdBckquo) - p.openBquotes++ - - // The lexer didn't call p.rune for us, so that it could have - // the right p.openBquotes to properly handle backslashes. - p.rune() - - p.next() - cs.Stmts, cs.Last = p.stmtList() - if p.tok == bckQuote && p.lastBquoteEsc < p.openBquotes-1 { - // e.g. found ` before the nested backquote \` was closed. - p.tok = _EOF - p.quoteErr(cs.Pos(), bckQuote) - } - p.postNested(old) - p.openBquotes-- - cs.Right = p.pos - - // Like above, the lexer didn't call p.rune for us. - p.rune() - if !p.got(bckQuote) { - p.quoteErr(cs.Pos(), bckQuote) - } - return cs - case globQuest, globStar, globPlus, globAt, globExcl: - if p.lang == LangPOSIX { - p.langErr(p.pos, "extended globs", LangBash, LangMirBSDKorn) - } - eg := &ExtGlob{Op: GlobOperator(p.tok), OpPos: p.pos} - lparens := 1 - r := p.r - globLoop: - for p.newLit(r); ; r = p.rune() { - switch r { - case utf8.RuneSelf: - break globLoop - case '(': - lparens++ - case ')': - if lparens--; lparens == 0 { - break globLoop - } - } - } - eg.Pattern = p.lit(posAddCol(eg.OpPos, 2), p.endLit()) - p.rune() - p.next() - if lparens != 0 { - p.matchingErr(eg.OpPos, eg.Op, rightParen) - } - return eg - default: - return nil - } -} - -func (p *Parser) dblQuoted() *DblQuoted { - alloc := &struct { - quoted DblQuoted - parts [1]WordPart - }{ - quoted: DblQuoted{Left: p.pos, Dollar: p.tok == dollDblQuote}, - } - q := &alloc.quoted - old := p.quote - p.quote = dblQuotes - p.next() - q.Parts = p.wordParts(alloc.parts[:0]) - p.quote = old - q.Right = p.pos - if !p.got(dblQuote) { - p.quoteErr(q.Pos(), dblQuote) - } - return q -} - -func singleRuneParam(r rune) bool { - switch r { - case '@', '*', '#', '$', '?', '!', '-', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return true - } - return false -} - -func (p *Parser) paramExp() *ParamExp { - pe := &ParamExp{Dollar: p.pos} - old := p.quote - p.quote = paramExpName - if p.r == '#' { - p.tok = hash - p.pos = p.nextPos() - p.rune() - } else { - p.next() - } - switch p.tok { - case hash: - if paramNameOp(p.r) { - pe.Length = true - p.next() - } - case perc: - if p.lang != LangMirBSDKorn { - p.posErr(pe.Pos(), `"${%%foo}" is a mksh feature`) - } - if paramNameOp(p.r) { - pe.Width = true - p.next() - } - case exclMark: - if paramNameOp(p.r) { - pe.Excl = true - p.next() - } - } - op := p.tok - switch p.tok { - case _Lit, _LitWord: - if !numberLiteral(p.val) && !ValidName(p.val) { - p.curErr("invalid parameter name") - } - pe.Param = p.lit(p.pos, p.val) - p.next() - case quest, minus: - if pe.Length && p.r != '}' { - // actually ${#-default}, not ${#-}; fix the ambiguity - pe.Length = false - pe.Param = p.lit(posAddCol(p.pos, -1), "#") - pe.Param.ValueEnd = p.pos - break - } - fallthrough - case at, star, hash, exclMark, dollar: - pe.Param = p.lit(p.pos, p.tok.String()) - p.next() - default: - p.curErr("parameter expansion requires a literal") - } - switch p.tok { - case _Lit, _LitWord: - p.curErr("%s cannot be followed by a word", op) - case rightBrace: - if pe.Excl && p.lang == LangPOSIX { - p.posErr(pe.Pos(), `"${!foo}" is a bash/mksh feature`) - } - pe.Rbrace = p.pos - p.quote = old - p.next() - return pe - case leftBrack: - if p.lang == LangPOSIX { - p.langErr(p.pos, "arrays", LangBash, LangMirBSDKorn) - } - if !ValidName(pe.Param.Value) { - p.curErr("cannot index a special parameter name") - } - pe.Index = p.eitherIndex() - } - if p.tok == rightBrace { - pe.Rbrace = p.pos - p.quote = old - p.next() - return pe - } - if p.tok != _EOF && (pe.Length || pe.Width) { - p.curErr("cannot combine multiple parameter expansion operators") - } - switch p.tok { - case slash, dblSlash: - // pattern search and replace - if p.lang == LangPOSIX { - p.langErr(p.pos, "search and replace", LangBash, LangMirBSDKorn) - } - pe.Repl = &Replace{All: p.tok == dblSlash} - p.quote = paramExpRepl - p.next() - pe.Repl.Orig = p.getWord() - p.quote = paramExpExp - if p.got(slash) { - pe.Repl.With = p.getWord() - } - case colon: - // slicing - if p.lang == LangPOSIX { - p.langErr(p.pos, "slicing", LangBash, LangMirBSDKorn) - } - pe.Slice = &Slice{} - colonPos := p.pos - p.quote = paramExpSlice - if p.next(); p.tok != colon { - pe.Slice.Offset = p.followArithm(colon, colonPos) - } - colonPos = p.pos - if p.got(colon) { - pe.Slice.Length = p.followArithm(colon, colonPos) - } - // Need to use a different matched style so arithm errors - // get reported correctly - p.quote = old - pe.Rbrace = p.pos - p.matchedArithm(pe.Dollar, dollBrace, rightBrace) - return pe - case caret, dblCaret, comma, dblComma: - // upper/lower case - if !p.lang.isBash() { - p.langErr(p.pos, "this expansion operator", LangBash) - } - pe.Exp = p.paramExpExp() - case at, star: - switch { - case p.tok == at && p.lang == LangPOSIX: - p.langErr(p.pos, "this expansion operator", LangBash, LangMirBSDKorn) - case p.tok == star && !pe.Excl: - p.curErr("not a valid parameter expansion operator: %v", p.tok) - case pe.Excl && p.r == '}': - if !p.lang.isBash() { - p.posErr(pe.Pos(), `"${!foo%s}" is a bash feature`, p.tok) - } - pe.Names = ParNamesOperator(p.tok) - p.next() - default: - pe.Exp = p.paramExpExp() - } - case plus, colPlus, minus, colMinus, quest, colQuest, assgn, colAssgn, - perc, dblPerc, hash, dblHash: - pe.Exp = p.paramExpExp() - case _EOF: - default: - p.curErr("not a valid parameter expansion operator: %v", p.tok) - } - p.quote = old - pe.Rbrace = p.matched(pe.Dollar, dollBrace, rightBrace) - return pe -} - -func (p *Parser) paramExpExp() *Expansion { - op := ParExpOperator(p.tok) - p.quote = paramExpExp - p.next() - if op == OtherParamOps { - switch p.tok { - case _Lit, _LitWord: - default: - p.curErr("@ expansion operator requires a literal") - } - switch p.val { - case "a", "k", "u", "A", "E", "K", "L", "P", "U": - if !p.lang.isBash() { - p.langErr(p.pos, "this expansion operator", LangBash) - } - case "#": - if p.lang != LangMirBSDKorn { - p.langErr(p.pos, "this expansion operator", LangMirBSDKorn) - } - case "Q": - default: - p.curErr("invalid @ expansion operator %q", p.val) - } - } - return &Expansion{Op: op, Word: p.getWord()} -} - -func (p *Parser) eitherIndex() ArithmExpr { - old := p.quote - lpos := p.pos - p.quote = arithmExprBrack - p.next() - if p.tok == star || p.tok == at { - p.tok, p.val = _LitWord, p.tok.String() - } - expr := p.followArithm(leftBrack, lpos) - p.quote = old - p.matchedArithm(lpos, leftBrack, rightBrack) - return expr -} - -func (p *Parser) stopToken() bool { - switch p.tok { - case _EOF, _Newl, semicolon, and, or, andAnd, orOr, orAnd, dblSemicolon, - semiAnd, dblSemiAnd, semiOr, rightParen: - return true - case bckQuote: - return p.backquoteEnd() - } - return false -} - -func (p *Parser) backquoteEnd() bool { - return p.lastBquoteEsc < p.openBquotes -} - -// ValidName returns whether val is a valid name as per the POSIX spec. -func ValidName(val string) bool { - if val == "" { - return false - } - for i, r := range val { - switch { - case 'a' <= r && r <= 'z': - case 'A' <= r && r <= 'Z': - case r == '_': - case i > 0 && '0' <= r && r <= '9': - default: - return false - } - } - return true -} - -func numberLiteral(val string) bool { - for _, r := range val { - if '0' > r || r > '9' { - return false - } - } - return true -} - -func (p *Parser) hasValidIdent() bool { - if p.tok != _Lit && p.tok != _LitWord { - return false - } - if end := p.eqlOffs; end > 0 { - if p.val[end-1] == '+' && p.lang != LangPOSIX { - end-- // a+=x - } - if ValidName(p.val[:end]) { - return true - } - } else if !ValidName(p.val) { - return false // *[i]=x - } - return p.r == '[' // a[i]=x -} - -func (p *Parser) getAssign(needEqual bool) *Assign { - as := &Assign{} - if p.eqlOffs > 0 { // foo=bar - nameEnd := p.eqlOffs - if p.lang != LangPOSIX && p.val[p.eqlOffs-1] == '+' { - // a+=b - as.Append = true - nameEnd-- - } - as.Name = p.lit(p.pos, p.val[:nameEnd]) - // since we're not using the entire p.val - as.Name.ValueEnd = posAddCol(as.Name.ValuePos, nameEnd) - left := p.lit(posAddCol(p.pos, 1), p.val[p.eqlOffs+1:]) - if left.Value != "" { - left.ValuePos = posAddCol(left.ValuePos, p.eqlOffs) - as.Value = p.wordOne(left) - } - p.next() - } else { // foo[x]=bar - as.Name = p.lit(p.pos, p.val) - // hasValidIdent already checks p.r is '[' - p.rune() - p.pos = posAddCol(p.pos, 1) - as.Index = p.eitherIndex() - if p.spaced || p.stopToken() { - if needEqual { - p.followErr(as.Pos(), "a[b]", "=") - } else { - as.Naked = true - return as - } - } - if len(p.val) > 0 && p.val[0] == '+' { - as.Append = true - p.val = p.val[1:] - p.pos = posAddCol(p.pos, 1) - } - if len(p.val) < 1 || p.val[0] != '=' { - if as.Append { - p.followErr(as.Pos(), "a[b]+", "=") - } else { - p.followErr(as.Pos(), "a[b]", "=") - } - return nil - } - p.pos = posAddCol(p.pos, 1) - p.val = p.val[1:] - if p.val == "" { - p.next() - } - } - if p.spaced || p.stopToken() { - return as - } - if as.Value == nil && p.tok == leftParen { - if p.lang == LangPOSIX { - p.langErr(p.pos, "arrays", LangBash, LangMirBSDKorn) - } - if as.Index != nil { - p.curErr("arrays cannot be nested") - } - as.Array = &ArrayExpr{Lparen: p.pos} - newQuote := p.quote - if p.lang.isBash() { - newQuote = arrayElems - } - old := p.preNested(newQuote) - p.next() - p.got(_Newl) - for p.tok != _EOF && p.tok != rightParen { - ae := &ArrayElem{} - ae.Comments, p.accComs = p.accComs, nil - if p.tok == leftBrack { - left := p.pos - ae.Index = p.eitherIndex() - p.follow(left, `"[x]"`, assgn) - } - if ae.Value = p.getWord(); ae.Value == nil { - switch p.tok { - case leftParen: - p.curErr("arrays cannot be nested") - return nil - case _Newl, rightParen, leftBrack: - // TODO: support [index]=[ - default: - p.curErr("array element values must be words") - return nil - } - } - if len(p.accComs) > 0 { - c := p.accComs[0] - if c.Pos().Line() == ae.End().Line() { - ae.Comments = append(ae.Comments, c) - p.accComs = p.accComs[1:] - } - } - as.Array.Elems = append(as.Array.Elems, ae) - p.got(_Newl) - } - as.Array.Last, p.accComs = p.accComs, nil - p.postNested(old) - as.Array.Rparen = p.matched(as.Array.Lparen, leftParen, rightParen) - } else if w := p.getWord(); w != nil { - if as.Value == nil { - as.Value = w - } else { - as.Value.Parts = append(as.Value.Parts, w.Parts...) - } - } - return as -} - -func (p *Parser) peekRedir() bool { - switch p.tok { - case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut, - hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir: - return true - } - return false -} - -func (p *Parser) doRedirect(s *Stmt) { - var r *Redirect - if s.Redirs == nil { - var alloc struct { - redirs [4]*Redirect - redir Redirect - } - s.Redirs = alloc.redirs[:0] - r = &alloc.redir - s.Redirs = append(s.Redirs, r) - } else { - r = &Redirect{} - s.Redirs = append(s.Redirs, r) - } - r.N = p.getLit() - if !p.lang.isBash() && r.N != nil && r.N.Value[0] == '{' { - p.langErr(r.N.Pos(), "{varname} redirects", LangBash) - } - if p.lang == LangPOSIX && (p.tok == rdrAll || p.tok == appAll) { - p.langErr(p.pos, "&> redirects", LangBash, LangMirBSDKorn) - } - r.Op, r.OpPos = RedirOperator(p.tok), p.pos - p.next() - switch r.Op { - case Hdoc, DashHdoc: - old := p.quote - p.quote, p.forbidNested = hdocWord, true - p.heredocs = append(p.heredocs, r) - r.Word = p.followWordTok(token(r.Op), r.OpPos) - p.quote, p.forbidNested = old, false - if p.tok == _Newl { - if len(p.accComs) > 0 { - c := p.accComs[0] - if c.Pos().Line() == s.End().Line() { - s.Comments = append(s.Comments, c) - p.accComs = p.accComs[1:] - } - } - p.doHeredocs() - } - case WordHdoc: - if p.lang == LangPOSIX { - p.langErr(r.OpPos, "herestrings", LangBash, LangMirBSDKorn) - } - fallthrough - default: - r.Word = p.followWordTok(token(r.Op), r.OpPos) - } -} - -func (p *Parser) getStmt(readEnd, binCmd, fnBody bool) *Stmt { - pos, ok := p.gotRsrv("!") - s := &Stmt{Position: pos} - if ok { - s.Negated = true - if p.stopToken() { - p.posErr(s.Pos(), `"!" cannot form a statement alone`) - } - if _, ok := p.gotRsrv("!"); ok { - p.posErr(s.Pos(), `cannot negate a command multiple times`) - } - } - if s = p.gotStmtPipe(s, false); s == nil || p.err != nil { - return nil - } - // instead of using recursion, iterate manually - for p.tok == andAnd || p.tok == orOr { - if binCmd { - // left associativity: in a list of BinaryCmds, the - // right recursion should only read a single element - return s - } - b := &BinaryCmd{ - OpPos: p.pos, - Op: BinCmdOperator(p.tok), - X: s, - } - p.next() - p.got(_Newl) - b.Y = p.getStmt(false, true, false) - if b.Y == nil || p.err != nil { - p.followErr(b.OpPos, b.Op.String(), "a statement") - return nil - } - s = &Stmt{Position: s.Position} - s.Cmd = b - s.Comments, b.X.Comments = b.X.Comments, nil - } - if readEnd { - switch p.tok { - case semicolon: - s.Semicolon = p.pos - p.next() - case and: - s.Semicolon = p.pos - p.next() - s.Background = true - case orAnd: - s.Semicolon = p.pos - p.next() - s.Coprocess = true - } - } - if len(p.accComs) > 0 && !binCmd && !fnBody { - c := p.accComs[0] - if c.Pos().Line() == s.End().Line() { - s.Comments = append(s.Comments, c) - p.accComs = p.accComs[1:] - } - } - return s -} - -func (p *Parser) gotStmtPipe(s *Stmt, binCmd bool) *Stmt { - s.Comments, p.accComs = p.accComs, nil - switch p.tok { - case _LitWord: - switch p.val { - case "{": - p.block(s) - case "if": - p.ifClause(s) - case "while", "until": - p.whileClause(s, p.val == "until") - case "for": - p.forClause(s) - case "case": - p.caseClause(s) - case "}": - p.curErr(`%q can only be used to close a block`, p.val) - case "then": - p.curErr(`%q can only be used in an if`, p.val) - case "elif": - p.curErr(`%q can only be used in an if`, p.val) - case "fi": - p.curErr(`%q can only be used to end an if`, p.val) - case "do": - p.curErr(`%q can only be used in a loop`, p.val) - case "done": - p.curErr(`%q can only be used to end a loop`, p.val) - case "esac": - p.curErr(`%q can only be used to end a case`, p.val) - case "!": - if !s.Negated { - p.curErr(`"!" can only be used in full statements`) - break - } - case "[[": - if p.lang != LangPOSIX { - p.testClause(s) - } - case "]]": - if p.lang != LangPOSIX { - p.curErr(`%q can only be used to close a test`, p.val) - } - case "let": - if p.lang != LangPOSIX { - p.letClause(s) - } - case "function": - if p.lang != LangPOSIX { - p.bashFuncDecl(s) - } - case "declare": - if p.lang.isBash() { // Note that mksh lacks this one. - p.declClause(s) - } - case "local", "export", "readonly", "typeset", "nameref": - if p.lang != LangPOSIX { - p.declClause(s) - } - case "time": - if p.lang != LangPOSIX { - p.timeClause(s) - } - case "coproc": - if p.lang.isBash() { // Note that mksh lacks this one. - p.coprocClause(s) - } - case "select": - if p.lang != LangPOSIX { - p.selectClause(s) - } - case "@test": - if p.lang == LangBats { - p.testDecl(s) - } - } - if s.Cmd != nil { - break - } - if p.hasValidIdent() { - p.callExpr(s, nil, true) - break - } - name := p.lit(p.pos, p.val) - if p.next(); p.got(leftParen) { - p.follow(name.ValuePos, "foo(", rightParen) - if p.lang == LangPOSIX && !ValidName(name.Value) { - p.posErr(name.Pos(), "invalid func name") - } - p.funcDecl(s, name, name.ValuePos, true) - } else { - p.callExpr(s, p.wordOne(name), false) - } - case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut, - hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir: - p.doRedirect(s) - p.callExpr(s, nil, false) - case bckQuote: - if p.backquoteEnd() { - return nil - } - fallthrough - case _Lit, dollBrace, dollDblParen, dollParen, dollar, cmdIn, cmdOut, - sglQuote, dollSglQuote, dblQuote, dollDblQuote, dollBrack, - globQuest, globStar, globPlus, globAt, globExcl: - if p.hasValidIdent() { - p.callExpr(s, nil, true) - break - } - w := p.wordAnyNumber() - if p.got(leftParen) { - p.posErr(w.Pos(), "invalid func name") - } - p.callExpr(s, w, false) - case leftParen: - p.subshell(s) - case dblLeftParen: - p.arithmExpCmd(s) - default: - if len(s.Redirs) == 0 { - return nil - } - } - for p.peekRedir() { - p.doRedirect(s) - } - // instead of using recursion, iterate manually - for p.tok == or || p.tok == orAnd { - if binCmd { - // left associativity: in a list of BinaryCmds, the - // right recursion should only read a single element - return s - } - if p.tok == orAnd && p.lang == LangMirBSDKorn { - // No need to check for LangPOSIX, as on that language - // we parse |& as two tokens. - break - } - b := &BinaryCmd{OpPos: p.pos, Op: BinCmdOperator(p.tok), X: s} - p.next() - p.got(_Newl) - if b.Y = p.gotStmtPipe(&Stmt{Position: p.pos}, true); b.Y == nil || p.err != nil { - p.followErr(b.OpPos, b.Op.String(), "a statement") - break - } - s = &Stmt{Position: s.Position} - s.Cmd = b - s.Comments, b.X.Comments = b.X.Comments, nil - // in "! x | y", the bang applies to the entire pipeline - s.Negated = b.X.Negated - b.X.Negated = false - } - return s -} - -func (p *Parser) subshell(s *Stmt) { - sub := &Subshell{Lparen: p.pos} - old := p.preNested(subCmd) - p.next() - sub.Stmts, sub.Last = p.stmtList() - p.postNested(old) - sub.Rparen = p.matched(sub.Lparen, leftParen, rightParen) - s.Cmd = sub -} - -func (p *Parser) arithmExpCmd(s *Stmt) { - ar := &ArithmCmd{Left: p.pos} - old := p.preNested(arithmExprCmd) - p.next() - if p.got(hash) { - if p.lang != LangMirBSDKorn { - p.langErr(ar.Pos(), "unsigned expressions", LangMirBSDKorn) - } - ar.Unsigned = true - } - ar.X = p.followArithm(dblLeftParen, ar.Left) - ar.Right = p.arithmEnd(dblLeftParen, ar.Left, old) - s.Cmd = ar -} - -func (p *Parser) block(s *Stmt) { - b := &Block{Lbrace: p.pos} - p.next() - b.Stmts, b.Last = p.stmtList("}") - pos, ok := p.gotRsrv("}") - b.Rbrace = pos - if !ok { - p.matchingErr(b.Lbrace, "{", "}") - } - s.Cmd = b -} - -func (p *Parser) ifClause(s *Stmt) { - rootIf := &IfClause{Position: p.pos} - p.next() - rootIf.Cond, rootIf.CondLast = p.followStmts("if", rootIf.Position, "then") - rootIf.ThenPos = p.followRsrv(rootIf.Position, "if ", "then") - rootIf.Then, rootIf.ThenLast = p.followStmts("then", rootIf.ThenPos, "fi", "elif", "else") - curIf := rootIf - for p.tok == _LitWord && p.val == "elif" { - elf := &IfClause{Position: p.pos} - curIf.Last = p.accComs - p.accComs = nil - p.next() - elf.Cond, elf.CondLast = p.followStmts("elif", elf.Position, "then") - elf.ThenPos = p.followRsrv(elf.Position, "elif ", "then") - elf.Then, elf.ThenLast = p.followStmts("then", elf.ThenPos, "fi", "elif", "else") - curIf.Else = elf - curIf = elf - } - if elsePos, ok := p.gotRsrv("else"); ok { - curIf.Last = p.accComs - p.accComs = nil - els := &IfClause{Position: elsePos} - els.Then, els.ThenLast = p.followStmts("else", els.Position, "fi") - curIf.Else = els - curIf = els - } - curIf.Last = p.accComs - p.accComs = nil - rootIf.FiPos = p.stmtEnd(rootIf, "if", "fi") - for els := rootIf.Else; els != nil; els = els.Else { - // All the nested IfClauses share the same FiPos. - els.FiPos = rootIf.FiPos - } - s.Cmd = rootIf -} - -func (p *Parser) whileClause(s *Stmt, until bool) { - wc := &WhileClause{WhilePos: p.pos, Until: until} - rsrv := "while" - rsrvCond := "while " - if wc.Until { - rsrv = "until" - rsrvCond = "until " - } - p.next() - wc.Cond, wc.CondLast = p.followStmts(rsrv, wc.WhilePos, "do") - wc.DoPos = p.followRsrv(wc.WhilePos, rsrvCond, "do") - wc.Do, wc.DoLast = p.followStmts("do", wc.DoPos, "done") - wc.DonePos = p.stmtEnd(wc, rsrv, "done") - s.Cmd = wc -} - -func (p *Parser) forClause(s *Stmt) { - fc := &ForClause{ForPos: p.pos} - p.next() - fc.Loop = p.loop(fc.ForPos) - - start, end := "do", "done" - if pos, ok := p.gotRsrv("{"); ok { - if p.lang == LangPOSIX { - p.langErr(pos, "for loops with braces", LangBash, LangMirBSDKorn) - } - fc.DoPos = pos - fc.Braces = true - start, end = "{", "}" - } else { - fc.DoPos = p.followRsrv(fc.ForPos, "for foo [in words]", start) - } - - s.Comments = append(s.Comments, p.accComs...) - p.accComs = nil - fc.Do, fc.DoLast = p.followStmts(start, fc.DoPos, end) - fc.DonePos = p.stmtEnd(fc, "for", end) - s.Cmd = fc -} - -func (p *Parser) loop(fpos Pos) Loop { - if !p.lang.isBash() { - switch p.tok { - case leftParen, dblLeftParen: - p.langErr(p.pos, "c-style fors", LangBash) - } - } - if p.tok == dblLeftParen { - cl := &CStyleLoop{Lparen: p.pos} - old := p.preNested(arithmExprCmd) - p.next() - cl.Init = p.arithmExpr(false) - if !p.got(dblSemicolon) { - p.follow(p.pos, "expr", semicolon) - cl.Cond = p.arithmExpr(false) - p.follow(p.pos, "expr", semicolon) - } - cl.Post = p.arithmExpr(false) - cl.Rparen = p.arithmEnd(dblLeftParen, cl.Lparen, old) - p.got(semicolon) - p.got(_Newl) - return cl - } - return p.wordIter("for", fpos) -} - -func (p *Parser) wordIter(ftok string, fpos Pos) *WordIter { - wi := &WordIter{} - if wi.Name = p.getLit(); wi.Name == nil { - p.followErr(fpos, ftok, "a literal") - } - if p.got(semicolon) { - p.got(_Newl) - return wi - } - p.got(_Newl) - if pos, ok := p.gotRsrv("in"); ok { - wi.InPos = pos - for !p.stopToken() { - if w := p.getWord(); w == nil { - p.curErr("word list can only contain words") - } else { - wi.Items = append(wi.Items, w) - } - } - p.got(semicolon) - p.got(_Newl) - } else if p.tok == _LitWord && p.val == "do" { - } else { - p.followErr(fpos, ftok+" foo", `"in", "do", ;, or a newline`) - } - return wi -} - -func (p *Parser) selectClause(s *Stmt) { - fc := &ForClause{ForPos: p.pos, Select: true} - p.next() - fc.Loop = p.wordIter("select", fc.ForPos) - fc.DoPos = p.followRsrv(fc.ForPos, "select foo [in words]", "do") - fc.Do, fc.DoLast = p.followStmts("do", fc.DoPos, "done") - fc.DonePos = p.stmtEnd(fc, "select", "done") - s.Cmd = fc -} - -func (p *Parser) caseClause(s *Stmt) { - cc := &CaseClause{Case: p.pos} - p.next() - cc.Word = p.getWord() - if cc.Word == nil { - p.followErr(cc.Case, "case", "a word") - } - end := "esac" - p.got(_Newl) - if pos, ok := p.gotRsrv("{"); ok { - cc.In = pos - cc.Braces = true - if p.lang != LangMirBSDKorn { - p.posErr(cc.Pos(), `"case i {" is a mksh feature`) - } - end = "}" - } else { - cc.In = p.followRsrv(cc.Case, "case x", "in") - } - cc.Items = p.caseItems(end) - cc.Last, p.accComs = p.accComs, nil - cc.Esac = p.stmtEnd(cc, "case", end) - s.Cmd = cc -} - -func (p *Parser) caseItems(stop string) (items []*CaseItem) { - p.got(_Newl) - for p.tok != _EOF && (p.tok != _LitWord || p.val != stop) { - ci := &CaseItem{} - ci.Comments, p.accComs = p.accComs, nil - p.got(leftParen) - for p.tok != _EOF { - if w := p.getWord(); w == nil { - p.curErr("case patterns must consist of words") - } else { - ci.Patterns = append(ci.Patterns, w) - } - if p.tok == rightParen { - break - } - if !p.got(or) { - p.curErr("case patterns must be separated with |") - } - } - old := p.preNested(switchCase) - p.next() - ci.Stmts, ci.Last = p.stmtList(stop) - p.postNested(old) - switch p.tok { - case dblSemicolon, semiAnd, dblSemiAnd, semiOr: - default: - ci.Op = Break - items = append(items, ci) - return - } - ci.Last = append(ci.Last, p.accComs...) - p.accComs = nil - ci.OpPos = p.pos - ci.Op = CaseOperator(p.tok) - p.next() - p.got(_Newl) - - // Split the comments: - // - // case x in - // a) - // foo - // ;; - // # comment for a - // # comment for b - // b) - // [...] - split := len(p.accComs) - for i := len(p.accComs) - 1; i >= 0; i-- { - c := p.accComs[i] - if c.Pos().Col() != p.pos.Col() { - break - } - split = i - } - ci.Comments = append(ci.Comments, p.accComs[:split]...) - p.accComs = p.accComs[split:] - - items = append(items, ci) - } - return -} - -func (p *Parser) testClause(s *Stmt) { - tc := &TestClause{Left: p.pos} - old := p.preNested(testExpr) - p.next() - if _, ok := p.gotRsrv("]]"); ok || p.tok == _EOF { - p.posErr(tc.Left, "test clause requires at least one expression") - } - tc.X = p.testExpr(false) - if tc.X == nil { - p.followErrExp(tc.Left, "[[") - } - tc.Right = p.pos - if _, ok := p.gotRsrv("]]"); !ok { - p.matchingErr(tc.Left, "[[", "]]") - } - p.postNested(old) - s.Cmd = tc -} - -func (p *Parser) testExpr(pastAndOr bool) TestExpr { - p.got(_Newl) - var left TestExpr - if pastAndOr { - left = p.testExprBase() - } else { - left = p.testExpr(true) - } - if left == nil { - return left - } - p.got(_Newl) - switch p.tok { - case andAnd, orOr: - case _LitWord: - if p.val == "]]" { - return left - } - if p.tok = token(testBinaryOp(p.val)); p.tok == illegalTok { - p.curErr("not a valid test operator: %s", p.val) - } - case rdrIn, rdrOut: - case _EOF, rightParen: - return left - case _Lit: - p.curErr("test operator words must consist of a single literal") - default: - p.curErr("not a valid test operator: %v", p.tok) - } - b := &BinaryTest{ - OpPos: p.pos, - Op: BinTestOperator(p.tok), - X: left, - } - // Save the previous quoteState, since we change it in TsReMatch. - oldQuote := p.quote - - switch b.Op { - case AndTest, OrTest: - p.next() - if b.Y = p.testExpr(false); b.Y == nil { - p.followErrExp(b.OpPos, b.Op.String()) - } - case TsReMatch: - if !p.lang.isBash() { - p.langErr(p.pos, "regex tests", LangBash) - } - p.rxOpenParens = 0 - p.rxFirstPart = true - // TODO(mvdan): Using nested states within a regex will break in - // all sorts of ways. The better fix is likely to use a stop - // token, like we do with heredocs. - p.quote = testExprRegexp - fallthrough - default: - if _, ok := b.X.(*Word); !ok { - p.posErr(b.OpPos, "expected %s, %s or %s after complex expr", - AndTest, OrTest, "]]") - } - p.next() - b.Y = p.followWordTok(token(b.Op), b.OpPos) - } - p.quote = oldQuote - return b -} - -func (p *Parser) testExprBase() TestExpr { - switch p.tok { - case _EOF, rightParen: - return nil - case _LitWord: - op := token(testUnaryOp(p.val)) - switch op { - case illegalTok: - case tsRefVar, tsModif: // not available in mksh - if p.lang.isBash() { - p.tok = op - } - default: - p.tok = op - } - } - switch p.tok { - case exclMark: - u := &UnaryTest{OpPos: p.pos, Op: TsNot} - p.next() - if u.X = p.testExpr(false); u.X == nil { - p.followErrExp(u.OpPos, u.Op.String()) - } - return u - case tsExists, tsRegFile, tsDirect, tsCharSp, tsBlckSp, tsNmPipe, - tsSocket, tsSmbLink, tsSticky, tsGIDSet, tsUIDSet, tsGrpOwn, - tsUsrOwn, tsModif, tsRead, tsWrite, tsExec, tsNoEmpty, - tsFdTerm, tsEmpStr, tsNempStr, tsOptSet, tsVarSet, tsRefVar: - u := &UnaryTest{OpPos: p.pos, Op: UnTestOperator(p.tok)} - p.next() - u.X = p.followWordTok(token(u.Op), u.OpPos) - return u - case leftParen: - pe := &ParenTest{Lparen: p.pos} - p.next() - if pe.X = p.testExpr(false); pe.X == nil { - p.followErrExp(pe.Lparen, "(") - } - pe.Rparen = p.matched(pe.Lparen, leftParen, rightParen) - return pe - case _LitWord: - if p.val == "]]" { - return nil - } - fallthrough - default: - if w := p.getWord(); w != nil { - return w - } - // otherwise we'd return a typed nil above - return nil - } -} - -func (p *Parser) declClause(s *Stmt) { - ds := &DeclClause{Variant: p.lit(p.pos, p.val)} - p.next() - for !p.stopToken() && !p.peekRedir() { - if p.hasValidIdent() { - ds.Args = append(ds.Args, p.getAssign(false)) - } else if p.eqlOffs > 0 { - p.curErr("invalid var name") - } else if p.tok == _LitWord && ValidName(p.val) { - ds.Args = append(ds.Args, &Assign{ - Naked: true, - Name: p.getLit(), - }) - } else if w := p.getWord(); w != nil { - ds.Args = append(ds.Args, &Assign{ - Naked: true, - Value: w, - }) - } else { - p.followErr(p.pos, ds.Variant.Value, "names or assignments") - } - } - s.Cmd = ds -} - -func isBashCompoundCommand(tok token, val string) bool { - switch tok { - case leftParen, dblLeftParen: - return true - case _LitWord: - switch val { - case "{", "if", "while", "until", "for", "case", "[[", - "coproc", "let", "function", "declare", "local", - "export", "readonly", "typeset", "nameref": - return true - } - } - return false -} - -func (p *Parser) timeClause(s *Stmt) { - tc := &TimeClause{Time: p.pos} - p.next() - if _, ok := p.gotRsrv("-p"); ok { - tc.PosixFormat = true - } - tc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) - s.Cmd = tc -} - -func (p *Parser) coprocClause(s *Stmt) { - cc := &CoprocClause{Coproc: p.pos} - if p.next(); isBashCompoundCommand(p.tok, p.val) { - // has no name - cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) - s.Cmd = cc - return - } - cc.Name = p.getWord() - cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) - if cc.Stmt == nil { - if cc.Name == nil { - p.posErr(cc.Coproc, "coproc clause requires a command") - return - } - // name was in fact the stmt - cc.Stmt = &Stmt{Position: cc.Name.Pos()} - cc.Stmt.Cmd = p.call(cc.Name) - cc.Name = nil - } else if cc.Name != nil { - if call, ok := cc.Stmt.Cmd.(*CallExpr); ok { - // name was in fact the start of a call - call.Args = append([]*Word{cc.Name}, call.Args...) - cc.Name = nil - } - } - s.Cmd = cc -} - -func (p *Parser) letClause(s *Stmt) { - lc := &LetClause{Let: p.pos} - old := p.preNested(arithmExprLet) - p.next() - for !p.stopToken() && !p.peekRedir() { - x := p.arithmExpr(true) - if x == nil { - break - } - lc.Exprs = append(lc.Exprs, x) - } - if len(lc.Exprs) == 0 { - p.followErrExp(lc.Let, "let") - } - p.postNested(old) - s.Cmd = lc -} - -func (p *Parser) bashFuncDecl(s *Stmt) { - fpos := p.pos - if p.next(); p.tok != _LitWord { - p.followErr(fpos, "function", "a name") - } - name := p.lit(p.pos, p.val) - hasParens := false - if p.next(); p.got(leftParen) { - hasParens = true - p.follow(name.ValuePos, "foo(", rightParen) - } - p.funcDecl(s, name, fpos, hasParens) -} - -func (p *Parser) testDecl(s *Stmt) { - td := &TestDecl{Position: p.pos} - p.next() - if td.Description = p.getWord(); td.Description == nil { - p.followErr(td.Position, "@test", "a description word") - } - if td.Body = p.getStmt(false, false, true); td.Body == nil { - p.followErr(td.Position, `@test "desc"`, "a statement") - } - s.Cmd = td -} - -func (p *Parser) callExpr(s *Stmt, w *Word, assign bool) { - ce := p.call(w) - if w == nil { - ce.Args = ce.Args[:0] - } - if assign { - ce.Assigns = append(ce.Assigns, p.getAssign(true)) - } -loop: - for { - switch p.tok { - case _EOF, _Newl, semicolon, and, or, andAnd, orOr, orAnd, - dblSemicolon, semiAnd, dblSemiAnd, semiOr: - break loop - case _LitWord: - if len(ce.Args) == 0 && p.hasValidIdent() { - ce.Assigns = append(ce.Assigns, p.getAssign(true)) - break - } - // Avoid failing later with the confusing "} can only be used to close a block". - if p.lang == LangPOSIX && p.val == "{" && w != nil && w.Lit() == "function" { - p.curErr("the %q builtin is a bash feature; tried parsing as posix", "function") - } - ce.Args = append(ce.Args, p.wordOne(p.lit(p.pos, p.val))) - p.next() - case _Lit: - if len(ce.Args) == 0 && p.hasValidIdent() { - ce.Assigns = append(ce.Assigns, p.getAssign(true)) - break - } - ce.Args = append(ce.Args, p.wordAnyNumber()) - case bckQuote: - if p.backquoteEnd() { - break loop - } - fallthrough - case dollBrace, dollDblParen, dollParen, dollar, cmdIn, cmdOut, - sglQuote, dollSglQuote, dblQuote, dollDblQuote, dollBrack, - globQuest, globStar, globPlus, globAt, globExcl: - ce.Args = append(ce.Args, p.wordAnyNumber()) - case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut, - hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir: - p.doRedirect(s) - case dblLeftParen: - p.curErr("%s can only be used to open an arithmetic cmd", p.tok) - case rightParen: - if p.quote == subCmd { - break loop - } - fallthrough - default: - // Note that we'll only keep the first error that happens. - if len(ce.Args) > 0 { - if cmd := ce.Args[0].Lit(); p.lang == LangPOSIX && isBashCompoundCommand(_LitWord, cmd) { - p.curErr("the %q builtin is a bash feature; tried parsing as posix", cmd) - } - } - p.curErr("a command can only contain words and redirects; encountered %s", p.tok) - } - } - if len(ce.Assigns) == 0 && len(ce.Args) == 0 { - return - } - if len(ce.Args) == 0 { - ce.Args = nil - } else { - for _, asgn := range ce.Assigns { - if asgn.Index != nil || asgn.Array != nil { - p.posErr(asgn.Pos(), "inline variables cannot be arrays") - } - } - } - s.Cmd = ce -} - -func (p *Parser) funcDecl(s *Stmt, name *Lit, pos Pos, withParens bool) { - fd := &FuncDecl{ - Position: pos, - RsrvWord: pos != name.ValuePos, - Parens: withParens, - Name: name, - } - p.got(_Newl) - if fd.Body = p.getStmt(false, false, true); fd.Body == nil { - p.followErr(fd.Pos(), "foo()", "a statement") - } - s.Cmd = fd -} From 82eda48066fdf16a55795ec4d9bcd3f5efc95910 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Tue, 24 Jun 2025 16:19:42 +0200 Subject: [PATCH 3/4] cli/connhelper/internal/syntax: fix linting issues cli/connhelper/internal/syntax/parser.go:31:2: Duplicate words (the) found (dupword) // Note that it shares some features with Bash, due to the the shared ^ cli/connhelper/internal/syntax/quote.go:48:1: cyclomatic complexity 35 of func `Quote` is high (> 16) (gocyclo) func Quote(s string, lang LangVariant) (string, error) { ^ cli/connhelper/internal/syntax/quote.go:103:3: shadow: declaration of "offs" shadows declaration at line 56 (govet) offs := 0 ^ Signed-off-by: Sebastiaan van Stijn --- cli/connhelper/internal/syntax/parser.go | 2 +- cli/connhelper/internal/syntax/quote.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cli/connhelper/internal/syntax/parser.go b/cli/connhelper/internal/syntax/parser.go index d0382299fb..06b1222f43 100644 --- a/cli/connhelper/internal/syntax/parser.go +++ b/cli/connhelper/internal/syntax/parser.go @@ -24,7 +24,7 @@ const ( // LangMirBSDKorn corresponds to the MirBSD Korn Shell, also known as // mksh, as described at http://www.mirbsd.org/htman/i386/man1/mksh.htm. - // Note that it shares some features with Bash, due to the the shared + // Note that it shares some features with Bash, due to the shared // ancestry that is ksh. // // We currently follow mksh version 59. diff --git a/cli/connhelper/internal/syntax/quote.go b/cli/connhelper/internal/syntax/quote.go index 6f27eba12d..628fa4891c 100644 --- a/cli/connhelper/internal/syntax/quote.go +++ b/cli/connhelper/internal/syntax/quote.go @@ -45,6 +45,8 @@ const ( // // Some strings do not require any quoting and are returned unchanged. // Those strings can be directly surrounded in single quotes as well. +// +//nolint:gocyclo // ignore "cyclomatic complexity 35 of func `Quote` is high (> 16) (gocyclo)" func Quote(s string, lang LangVariant) (string, error) { if s == "" { // Special case; an empty string must always be quoted, @@ -100,7 +102,7 @@ func Quote(s string, lang LangVariant) (string, error) { if nonPrintable { b.WriteString("$'") lastRequoteIfHex := false - offs := 0 + offs = 0 for rem := s; len(rem) > 0; { nextRequoteIfHex := false r, size := utf8.DecodeRuneInString(rem) From 88d113322499c3e7c0709781c608056ed1bc228c Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Thu, 6 Feb 2025 01:03:01 +0100 Subject: [PATCH 4/4] cli/connhelper: quote ssh arguments to prevent shell injection When connecting to a remote daemon through an ssh:// connection, the CLI connects with the remote host using ssh, executing the `docker system dial-stdio` command on the remote host to connect to the daemon API's unix socket. By default, the `docker system dial-stdio` command connects with the daemon using the default location (/var/run/docker.sock), or the location as configured on the remote host. Commit 25ebf0ec9c1cc0d0468e060072e6ab2ff7b87166 (included in docker CLI v24.0.0-rc.2 and higher) introduced a feature to allow the location of the socket to be specified through the host connection string, for example: DOCKER_HOST='ssh://example.test/run/custom-docker.sock' The custom path is included as part of the ssh command executed from the client machine to connect with the remote host. THe example above would execute the following command from the client machine; ssh -o ConnectTimeout=30 -T -- example.test docker --host unix:///run/custom-docker.sock system dial-stdio ssh executes remote commands in a shell environment, and no quoting was in place, which allowed for a connection string to include additional content, which would be expanded / executed on the remote machine. For example, the following example would execute `echo hello > /hello.txt` on the remote machine; export DOCKER_HOST='ssh://example.test/var/run/docker.sock $(echo hello > /hello.txt)' docker info # (output of docker info from the remote machine) While this doesn't allow the user to do anything they're not already able to do so (by directly using the same SSH connection), the behavior is not expected, so this patch adds quoting to prevent such URLs from resulting in expansion. This patch updates the cli/connhelper and cli/connhelper/ssh package to quote parameters used in the ssh command to prevent code execution and expansion of variables on the remote machine. Quoting is also applied to other parameters that are obtained from the DOCKER_HOST url, such as username and hostname. - The existing `Spec.Args()` method inthe cli/connhelper/ssh package now quotes arguments, and returns a nil slice when failing to quote. Users of this package should therefore check the returned arguments before consuming. This method did not provide an error-return, and adding one would be a breaking change. - A new `Spec.Command` method is introduced, which (unlike the `Spec.Args()` method) provides an error return. Users are recommended to use this new method instead of the `Spec.Args()` method. Some minor additional changes in behavior are included in this patch; - Connection URLs with a trailing slash (e.g. `ssh://example.test/`) would previously result in `unix:///` being used as custom socket path. After this patch, the trailing slash is ignored, and no custom socket path is used. - Specifying a remote command is now required. When passing an empty remote command, `Spec.Args()` now results in a `nil` value to be returned (or an `no remote command specified` error when using `Spec.Comnmand()`. Signed-off-by: Sebastiaan van Stijn --- cli/connhelper/connhelper.go | 17 ++-- cli/connhelper/ssh/ssh.go | 106 +++++++++++++++++-- cli/connhelper/ssh/ssh_test.go | 180 ++++++++++++++++++++++++++++++++- 3 files changed, 288 insertions(+), 15 deletions(-) diff --git a/cli/connhelper/connhelper.go b/cli/connhelper/connhelper.go index f4b9388599..25ce7aef02 100644 --- a/cli/connhelper/connhelper.go +++ b/cli/connhelper/connhelper.go @@ -47,14 +47,19 @@ func getConnectionHelper(daemonURL string, sshFlags []string) (*ConnectionHelper } sshFlags = addSSHTimeout(sshFlags) sshFlags = disablePseudoTerminalAllocation(sshFlags) + + remoteCommand := []string{"docker", "system", "dial-stdio"} + socketPath := sp.Path + if strings.Trim(sp.Path, "/") != "" { + remoteCommand = []string{"docker", "--host=unix://" + socketPath, "system", "dial-stdio"} + } + sshArgs, err := sp.Command(sshFlags, remoteCommand...) + if err != nil { + return nil, err + } return &ConnectionHelper{ Dialer: func(ctx context.Context, network, addr string) (net.Conn, error) { - args := []string{"docker"} - if sp.Path != "" { - args = append(args, "--host", "unix://"+sp.Path) - } - args = append(args, "system", "dial-stdio") - return commandconn.New(ctx, "ssh", append(sshFlags, sp.Args(args...)...)...) + return commandconn.New(ctx, "ssh", sshArgs...) }, Host: "http://docker.example.com", }, nil diff --git a/cli/connhelper/ssh/ssh.go b/cli/connhelper/ssh/ssh.go index 2c9d0d61b1..2fcb54a98f 100644 --- a/cli/connhelper/ssh/ssh.go +++ b/cli/connhelper/ssh/ssh.go @@ -5,6 +5,8 @@ import ( "errors" "fmt" "net/url" + + "github.com/docker/cli/cli/connhelper/internal/syntax" ) // ParseURL creates a [Spec] from the given ssh URL. It returns an error if @@ -76,16 +78,106 @@ type Spec struct { Path string } -// Args returns args except "ssh" itself combined with optional additional command args -func (sp *Spec) Args(add ...string) []string { +// Args returns args except "ssh" itself combined with optional additional +// command and args to be executed on the remote host. It attempts to quote +// the given arguments to account for ssh executing the remote command in a +// shell. It returns nil when unable to quote the remote command. +func (sp *Spec) Args(remoteCommandAndArgs ...string) []string { + // Format the remote command to run using the ssh connection, quoting + // values where needed because ssh executes these in a POSIX shell. + remoteCommand, err := quoteCommand(remoteCommandAndArgs...) + if err != nil { + return nil + } + + sshArgs, err := sp.args() + if err != nil { + return nil + } + if remoteCommand != "" { + sshArgs = append(sshArgs, remoteCommand) + } + return sshArgs +} + +func (sp *Spec) args(sshFlags ...string) ([]string, error) { var args []string + if sp.Host == "" { + return nil, errors.New("no host specified") + } if sp.User != "" { - args = append(args, "-l", sp.User) + // Quote user, as it's obtained from the URL. + usr, err := syntax.Quote(sp.User, syntax.LangPOSIX) + if err != nil { + return nil, fmt.Errorf("invalid user: %w", err) + } + args = append(args, "-l", usr) } if sp.Port != "" { - args = append(args, "-p", sp.Port) + // Quote port, as it's obtained from the URL. + port, err := syntax.Quote(sp.Port, syntax.LangPOSIX) + if err != nil { + return nil, fmt.Errorf("invalid port: %w", err) + } + args = append(args, "-p", port) } - args = append(args, "--", sp.Host) - args = append(args, add...) - return args + + // We consider "sshFlags" to be "trusted", and set from code only, + // as they are not parsed from the DOCKER_HOST URL. + args = append(args, sshFlags...) + + host, err := syntax.Quote(sp.Host, syntax.LangPOSIX) + if err != nil { + return nil, fmt.Errorf("invalid host: %w", err) + } + + return append(args, "--", host), nil +} + +// Command returns the ssh flags and arguments to execute a command +// (remoteCommandAndArgs) on the remote host. Where needed, it quotes +// values passed in remoteCommandAndArgs to account for ssh executing +// the remote command in a shell. It returns an error if no remote command +// is passed, or when unable to quote the remote command. +// +// Important: to preserve backward-compatibility, Command does not currently +// perform sanitization or quoting on the sshFlags and callers are expected +// to sanitize this argument. +func (sp *Spec) Command(sshFlags []string, remoteCommandAndArgs ...string) ([]string, error) { + if len(remoteCommandAndArgs) == 0 { + return nil, errors.New("no remote command specified") + } + sshArgs, err := sp.args(sshFlags...) + if err != nil { + return nil, err + } + remoteCommand, err := quoteCommand(remoteCommandAndArgs...) + if err != nil { + return nil, err + } + if remoteCommand != "" { + sshArgs = append(sshArgs, remoteCommand) + } + return sshArgs, nil +} + +// quoteCommand returns the remote command to run using the ssh connection +// as a single string, quoting values where needed because ssh executes +// these in a POSIX shell. +func quoteCommand(commandAndArgs ...string) (string, error) { + var quotedCmd string + for i, arg := range commandAndArgs { + a, err := syntax.Quote(arg, syntax.LangPOSIX) + if err != nil { + return "", fmt.Errorf("invalid argument: %w", err) + } + if i == 0 { + quotedCmd = a + continue + } + quotedCmd += " " + a + } + // each part is quoted appropriately, so now we'll have a full + // shell command to pass off to "ssh" + return quotedCmd, nil } diff --git a/cli/connhelper/ssh/ssh_test.go b/cli/connhelper/ssh/ssh_test.go index 492408f116..6de0da0d2d 100644 --- a/cli/connhelper/ssh/ssh_test.go +++ b/cli/connhelper/ssh/ssh_test.go @@ -1,6 +1,7 @@ package ssh import ( + "strings" "testing" "gotest.tools/v3/assert" @@ -26,6 +27,28 @@ func TestParseURL(t *testing.T) { Host: "example.com", }, }, + { + doc: "bare ssh URL with trailing slash", + url: "ssh://example.com/", + expectedArgs: []string{ + "--", "example.com", + }, + expectedSpec: Spec{ + Host: "example.com", + Path: "/", + }, + }, + { + doc: "bare ssh URL with trailing slashes", + url: "ssh://example.com//", + expectedArgs: []string{ + "--", "example.com", + }, + expectedSpec: Spec{ + Host: "example.com", + Path: "//", + }, + }, { doc: "bare ssh URL and remote command", url: "ssh://example.com", @@ -34,7 +57,7 @@ func TestParseURL(t *testing.T) { }, expectedArgs: []string{ "--", "example.com", - "docker", "system", "dial-stdio", + `docker system dial-stdio`, }, expectedSpec: Spec{ Host: "example.com", @@ -48,7 +71,7 @@ func TestParseURL(t *testing.T) { }, expectedArgs: []string{ "--", "example.com", - "docker", "--host", "unix:///var/run/docker.sock", "system", "dial-stdio", + `docker --host unix:///var/run/docker.sock system dial-stdio`, }, expectedSpec: Spec{ Host: "example.com", @@ -84,6 +107,25 @@ func TestParseURL(t *testing.T) { Path: "/var/run/docker.sock", }, }, + { + // This test is only to verify the behavior of ParseURL to + // pass through the Path as-is. Neither Spec.Args, nor + // Spec.Command use the Path field directly, and it should + // likely be deprecated. + doc: "bad path", + url: `ssh://example.com/var/run/docker.sock '$(echo hello > /hello.txt)'`, + remoteCommand: []string{ + "docker", "--host", `unix:///var/run/docker.sock '$(echo hello > /hello.txt)'`, "system", "dial-stdio", + }, + expectedArgs: []string{ + "--", "example.com", + `docker --host "unix:///var/run/docker.sock '\$(echo hello > /hello.txt)'" system dial-stdio`, + }, + expectedSpec: Spec{ + Host: "example.com", + Path: `/var/run/docker.sock '$(echo hello > /hello.txt)'`, + }, + }, { doc: "malformed URL", url: "malformed %%url", @@ -123,6 +165,21 @@ func TestParseURL(t *testing.T) { url: "https://example.com", expectedError: `invalid SSH URL: incorrect scheme: https`, }, + { + doc: "invalid URL with NUL character", + url: "ssh://example.com/var/run/\x00docker.sock", + expectedError: `invalid SSH URL: net/url: invalid control character in URL`, + }, + { + doc: "invalid URL with newline character", + url: "ssh://example.com/var/run/docker.sock\n", + expectedError: `invalid SSH URL: net/url: invalid control character in URL`, + }, + { + doc: "invalid URL with control character", + url: "ssh://example.com/var/run/\x1bdocker.sock", + expectedError: `invalid SSH URL: net/url: invalid control character in URL`, + }, } for _, tc := range testCases { t.Run(tc.doc, func(t *testing.T) { @@ -139,3 +196,122 @@ func TestParseURL(t *testing.T) { }) } } + +func TestCommand(t *testing.T) { + testCases := []struct { + doc string + url string + sshFlags []string + customCmd []string + expectedCmd []string + expectedError string + }{ + { + doc: "bare ssh URL", + url: "ssh://example.com", + expectedCmd: []string{ + "--", "example.com", + "docker system dial-stdio", + }, + }, + { + doc: "bare ssh URL with trailing slash", + url: "ssh://example.com/", + expectedCmd: []string{ + "--", "example.com", + "docker system dial-stdio", + }, + }, + { + doc: "bare ssh URL with custom ssh flags", + url: "ssh://example.com", + sshFlags: []string{"-T", "-o", "ConnectTimeout=30", "-oStrictHostKeyChecking=no"}, + expectedCmd: []string{ + "-T", + "-o", "ConnectTimeout=30", + "-oStrictHostKeyChecking=no", + "--", "example.com", + "docker system dial-stdio", + }, + }, + { + doc: "ssh URL with all options", + url: "ssh://me@example.com:10022/var/run/docker.sock", + sshFlags: []string{"-T", "-o ConnectTimeout=30"}, + expectedCmd: []string{ + "-l", "me", + "-p", "10022", + "-T", + "-o ConnectTimeout=30", + "--", "example.com", + "docker '--host=unix:///var/run/docker.sock' system dial-stdio", + }, + }, + { + doc: "bad ssh flags", + url: "ssh://example.com", + sshFlags: []string{"-T", "-o", `ConnectTimeout=30 $(echo hi > /hi.txt)`}, + expectedCmd: []string{ + "-T", + "-o", `ConnectTimeout=30 $(echo hi > /hi.txt)`, + "--", "example.com", + "docker system dial-stdio", + }, + }, + { + doc: "bad username", + url: `ssh://$(shutdown)me@example.com`, + expectedCmd: []string{ + "-l", `'$(shutdown)me'`, + "--", "example.com", + "docker system dial-stdio", + }, + }, + { + doc: "bad hostname", + url: `ssh://$(shutdown)example.com`, + expectedCmd: []string{ + "--", `'$(shutdown)example.com'`, + "docker system dial-stdio", + }, + }, + { + doc: "bad path", + url: `ssh://example.com/var/run/docker.sock '$(echo hello > /hello.txt)'`, + expectedCmd: []string{ + "--", "example.com", + `docker "--host=unix:///var/run/docker.sock '\$(echo hello > /hello.txt)'" system dial-stdio`, + }, + }, + { + doc: "missing command", + url: "ssh://example.com", + customCmd: []string{}, + expectedError: "no remote command specified", + }, + } + for _, tc := range testCases { + t.Run(tc.doc, func(t *testing.T) { + sp, err := ParseURL(tc.url) + assert.NilError(t, err) + + var commandAndArgs []string + if tc.customCmd == nil { + socketPath := sp.Path + commandAndArgs = []string{"docker", "system", "dial-stdio"} + if strings.Trim(socketPath, "/") != "" { + commandAndArgs = []string{"docker", "--host=unix://" + socketPath, "system", "dial-stdio"} + } + } + + actualCmd, err := sp.Command(tc.sshFlags, commandAndArgs...) + if tc.expectedError == "" { + assert.NilError(t, err) + assert.Check(t, is.DeepEqual(actualCmd, tc.expectedCmd), "%+#v", actualCmd) + } else { + assert.Check(t, is.Error(err, tc.expectedError)) + assert.Check(t, is.Nil(actualCmd)) + } + }) + } +}