nim-confutils/confutils/shell_completion.nim
Etan Kissling d0d6fb45b2
catch exceptions in splitCompletionLine (#77)
We currently return `@[]` in `splitCompletionLine` when there is
unexpected or unsupported input via environment variables, but let
exceptions during parsing propagate to the caller. Catching those
exceptions allows the caller to use same behaviour regardless of
the nature of unexpected input (either through env, or parsing).
Currently, callers don't seem to be aware of the exceptions, so
going with the behaviour used for environment errors of returning `@[]`.
2023-06-13 10:36:33 +02:00

261 lines
6.7 KiB
Nim

## A simple lexer meant to tokenize an input string as a shell would do.
import lexbase
import options
import streams
import os
import strutils
type
ShellLexer = object of BaseLexer
preserveTrailingWs: bool
mergeWordBreaks: bool
wordBreakChars: string
const
WORDBREAKS = "\"'@><=;|&(:"
SAFE_CHARS = {'a'..'z', 'A'..'Z', '0'..'9', '@', '%', '+', '=', ':', ',', '.', '/', '-'}
proc open(l: var ShellLexer, input: Stream, wordBreakChars: string = WORDBREAKS, preserveTrailingWs = true) =
lexbase.open(l, input)
l.preserveTrailingWs = preserveTrailingWs
l.mergeWordBreaks = false
l.wordBreakChars = wordBreakChars
proc parseQuoted(l: var ShellLexer, pos: int, isSingle: bool, output: var string): int =
var pos = pos
while true:
case l.buf[pos]:
of '\c': pos = lexbase.handleCR(l, pos)
of '\L': pos = lexbase.handleLF(l, pos)
of lexbase.EndOfFile: break
of '\\':
# Consume the backslash and the following character
inc(pos)
if (isSingle and l.buf[pos] in {'\''}) or
(not isSingle and l.buf[pos] in {'$', '`', '\\', '"'}):
# Escape the character
output.add(l.buf[pos])
else:
# Rewrite the escape sequence as-is
output.add('\\')
output.add(l.buf[pos])
inc(pos)
of '\"':
inc(pos)
if isSingle: output.add('\"')
else: break
of '\'':
inc(pos)
if isSingle: break
else: output.add('\'')
else:
output.add(l.buf[pos])
inc(pos)
return pos
proc getTok(l: var ShellLexer): Option[string] =
var pos = l.bufpos
# Skip the initial whitespace
while true:
case l.buf[pos]:
of '\c': pos = lexbase.handleCR(l, pos)
of '\L': pos = lexbase.handleLF(l, pos)
of '#':
# Skip everything until EOF/EOL
while l.buf[pos] notin {'\c', '\L', lexbase.EndOfFile}:
inc(pos)
of lexbase.EndOfFile:
# If we did eat up some whitespace return an empty token, this is needed
# to find out if the string ends with whitespace.
if l.preserveTrailingWs and l.bufpos != pos:
l.bufpos = pos
return some("")
return none(string)
of ' ', '\t':
inc(pos)
else:
break
var tokLit = ""
# Parse the next token
while true:
case l.buf[pos]:
of '\c': pos = lexbase.handleCR(l, pos)
of '\L': pos = lexbase.handleLF(l, pos)
of '\'':
# Single-quoted string
inc(pos)
pos = parseQuoted(l, pos, true, tokLit)
of '"':
# Double-quoted string
inc(pos)
pos = parseQuoted(l, pos, false, tokLit)
of '\\':
# Escape sequence
inc(pos)
if l.buf[pos] != lexbase.EndOfFile:
tokLit.add(l.buf[pos])
inc(pos)
of '#', ' ', '\t', lexbase.EndOfFile:
break
else:
let ch = l.buf[pos]
if ch notin l.wordBreakChars:
tokLit.add(l.buf[pos])
inc(pos)
# Merge together runs of adjacent word-breaking characters if requested
elif l.mergeWordBreaks:
while l.buf[pos] in l.wordBreakChars:
tokLit.add(l.buf[pos])
inc(pos)
l.mergeWordBreaks = false
break
else:
l.mergeWordBreaks = true
break
l.bufpos = pos
return some(tokLit)
proc splitCompletionLine*(): seq[string] =
let comp_line = os.getEnv("COMP_LINE")
var comp_point = parseInt(os.getEnv("COMP_POINT", "0"))
if comp_point == len(comp_line):
comp_point -= 1
if comp_point < 0 or comp_point > len(comp_line):
return @[]
# Take the useful part only
var strm = newStringStream(comp_line[0..comp_point])
# Split the resulting string
var l: ShellLexer
try:
l.open(strm)
while true:
let token = l.getTok()
if token.isNone():
break
result.add(token.get())
except IOError, OSError:
return @[]
proc shellQuote*(word: string): string =
if len(word) == 0:
return "''"
if allCharsInSet(word, SAFE_CHARS):
return word
result.add('\'')
for ch in word:
if ch == '\'': result.add('\\')
result.add(ch)
result.add('\'')
proc shellPathEscape*(path: string): string =
if allCharsInSet(path, SAFE_CHARS):
return path
for ch in path:
if ch notin SAFE_CHARS:
result.add('\\')
result.add(ch)
when isMainModule:
# Test data lifted from python's shlex unit-tests
const data = """
foo bar|foo|bar|
foo bar|foo|bar|
foo bar |foo|bar|
foo bar bla fasel|foo|bar|bla|fasel|
x y z xxxx|x|y|z|xxxx|
\x bar|x|bar|
\ x bar| x|bar|
\ bar| bar|
foo \x bar|foo|x|bar|
foo \ x bar|foo| x|bar|
foo \ bar|foo| bar|
foo "bar" bla|foo|bar|bla|
"foo" "bar" "bla"|foo|bar|bla|
"foo" bar "bla"|foo|bar|bla|
"foo" bar bla|foo|bar|bla|
foo 'bar' bla|foo|bar|bla|
'foo' 'bar' 'bla'|foo|bar|bla|
'foo' bar 'bla'|foo|bar|bla|
'foo' bar bla|foo|bar|bla|
blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
""||
''||
foo "" bar|foo||bar|
foo '' bar|foo||bar|
foo "" "" "" bar|foo||||bar|
foo '' '' '' bar|foo||||bar|
\"|"|
"\""|"|
"foo\ bar"|foo\ bar|
"foo\\ bar"|foo\ bar|
"foo\\ bar\""|foo\ bar"|
"foo\\" bar\"|foo\|bar"|
"foo\\ bar\" dfadf"|foo\ bar" dfadf|
"foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
"foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
"foo\x bar\" dfadf"|foo\x bar" dfadf|
\'|'|
'foo\ bar'|foo\ bar|
'foo\\ bar'|foo\\ bar|
"foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
\"foo|"foo|
\"foo\x|"foox|
"foo\x"|foo\x|
"foo\ "|foo\ |
foo\ xx|foo xx|
foo\ x\x|foo xx|
foo\ x\x\"|foo xx"|
"foo\ x\x"|foo\ x\x|
"foo\ x\x\\"|foo\ x\x\|
"foo\ x\x\\""foobar"|foo\ x\x\foobar|
"foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
"foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
"foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
'foo\ bar'|foo\ bar|
'foo\\ bar'|foo\\ bar|
foo\ bar|foo bar|
:-) ;-)|:-)|;-)|
áéíóú|áéíóú|
"""
var corpus = newStringStream(data)
var line = ""
while corpus.readLine(line):
let chunks = line.split('|')
let expr = chunks[0]
let expected = chunks[1..^2]
var l: ShellLexer
var strm = newStringStream(expr)
var got: seq[string]
l.open(strm, wordBreakChars="", preserveTrailingWs=false)
while true:
let x = l.getTok()
if x.isNone():
break
got.add(x.get())
if got != expected:
echo "got ", got
echo "expected ", expected
doAssert(false)
doAssert(quoteWord("") == "''")
doAssert(quoteWord("\\\"") == "'\\\"'")
doAssert(quoteWord("foobar") == "foobar")
doAssert(quoteWord("foo$bar") == "'foo$bar'")
doAssert(quoteWord("foo bar") == "'foo bar'")
doAssert(quoteWord("foo'bar") == "'foo\\'bar'")