456 lines
19 KiB
Python
456 lines
19 KiB
Python
import copy
|
|
from typing import TYPE_CHECKING, Any, Iterable, List, Optional
|
|
|
|
try:
|
|
from ..vendor.lexicon import Lexicon
|
|
from ..vendor.fluidity import StateMachine, state, transition
|
|
except ImportError:
|
|
from lexicon import Lexicon # type: ignore[no-redef]
|
|
from fluidity import ( # type: ignore[no-redef]
|
|
StateMachine,
|
|
state,
|
|
transition,
|
|
)
|
|
|
|
from ..exceptions import ParseError
|
|
from ..util import debug
|
|
|
|
if TYPE_CHECKING:
|
|
from .context import ParserContext
|
|
|
|
|
|
def is_flag(value: str) -> bool:
|
|
return value.startswith("-")
|
|
|
|
|
|
def is_long_flag(value: str) -> bool:
|
|
return value.startswith("--")
|
|
|
|
|
|
class ParseResult(List["ParserContext"]):
|
|
"""
|
|
List-like object with some extra parse-related attributes.
|
|
|
|
Specifically, a ``.remainder`` attribute, which is the string found after a
|
|
``--`` in any parsed argv list; and an ``.unparsed`` attribute, a list of
|
|
tokens that were unable to be parsed.
|
|
|
|
.. versionadded:: 1.0
|
|
"""
|
|
|
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
super().__init__(*args, **kwargs)
|
|
self.remainder = ""
|
|
self.unparsed: List[str] = []
|
|
|
|
|
|
class Parser:
|
|
"""
|
|
Create parser conscious of ``contexts`` and optional ``initial`` context.
|
|
|
|
``contexts`` should be an iterable of ``Context`` instances which will be
|
|
searched when new context names are encountered during a parse. These
|
|
Contexts determine what flags may follow them, as well as whether given
|
|
flags take values.
|
|
|
|
``initial`` is optional and will be used to determine validity of "core"
|
|
options/flags at the start of the parse run, if any are encountered.
|
|
|
|
``ignore_unknown`` determines what to do when contexts are found which do
|
|
not map to any members of ``contexts``. By default it is ``False``, meaning
|
|
any unknown contexts result in a parse error exception. If ``True``,
|
|
encountering an unknown context halts parsing and populates the return
|
|
value's ``.unparsed`` attribute with the remaining parse tokens.
|
|
|
|
.. versionadded:: 1.0
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
contexts: Iterable["ParserContext"] = (),
|
|
initial: Optional["ParserContext"] = None,
|
|
ignore_unknown: bool = False,
|
|
) -> None:
|
|
self.initial = initial
|
|
self.contexts = Lexicon()
|
|
self.ignore_unknown = ignore_unknown
|
|
for context in contexts:
|
|
debug("Adding {}".format(context))
|
|
if not context.name:
|
|
raise ValueError("Non-initial contexts must have names.")
|
|
exists = "A context named/aliased {!r} is already in this parser!"
|
|
if context.name in self.contexts:
|
|
raise ValueError(exists.format(context.name))
|
|
self.contexts[context.name] = context
|
|
for alias in context.aliases:
|
|
if alias in self.contexts:
|
|
raise ValueError(exists.format(alias))
|
|
self.contexts.alias(alias, to=context.name)
|
|
|
|
def parse_argv(self, argv: List[str]) -> ParseResult:
|
|
"""
|
|
Parse an argv-style token list ``argv``.
|
|
|
|
Returns a list (actually a subclass, `.ParseResult`) of
|
|
`.ParserContext` objects matching the order they were found in the
|
|
``argv`` and containing `.Argument` objects with updated values based
|
|
on any flags given.
|
|
|
|
Assumes any program name has already been stripped out. Good::
|
|
|
|
Parser(...).parse_argv(['--core-opt', 'task', '--task-opt'])
|
|
|
|
Bad::
|
|
|
|
Parser(...).parse_argv(['invoke', '--core-opt', ...])
|
|
|
|
:param argv: List of argument string tokens.
|
|
:returns:
|
|
A `.ParseResult` (a ``list`` subclass containing some number of
|
|
`.ParserContext` objects).
|
|
|
|
.. versionadded:: 1.0
|
|
"""
|
|
machine = ParseMachine(
|
|
# FIXME: initial should not be none
|
|
initial=self.initial, # type: ignore[arg-type]
|
|
contexts=self.contexts,
|
|
ignore_unknown=self.ignore_unknown,
|
|
)
|
|
# FIXME: Why isn't there str.partition for lists? There must be a
|
|
# better way to do this. Split argv around the double-dash remainder
|
|
# sentinel.
|
|
debug("Starting argv: {!r}".format(argv))
|
|
try:
|
|
ddash = argv.index("--")
|
|
except ValueError:
|
|
ddash = len(argv) # No remainder == body gets all
|
|
body = argv[:ddash]
|
|
remainder = argv[ddash:][1:] # [1:] to strip off remainder itself
|
|
if remainder:
|
|
debug(
|
|
"Remainder: argv[{!r}:][1:] => {!r}".format(ddash, remainder)
|
|
)
|
|
for index, token in enumerate(body):
|
|
# Handle non-space-delimited forms, if not currently expecting a
|
|
# flag value and still in valid parsing territory (i.e. not in
|
|
# "unknown" state which implies store-only)
|
|
# NOTE: we do this in a few steps so we can
|
|
# split-then-check-validity; necessary for things like when the
|
|
# previously seen flag optionally takes a value.
|
|
mutations = []
|
|
orig = token
|
|
if is_flag(token) and not machine.result.unparsed:
|
|
# Equals-sign-delimited flags, eg --foo=bar or -f=bar
|
|
if "=" in token:
|
|
token, _, value = token.partition("=")
|
|
msg = "Splitting x=y expr {!r} into tokens {!r} and {!r}"
|
|
debug(msg.format(orig, token, value))
|
|
mutations.append((index + 1, value))
|
|
# Contiguous boolean short flags, e.g. -qv
|
|
elif not is_long_flag(token) and len(token) > 2:
|
|
full_token = token[:]
|
|
rest, token = token[2:], token[:2]
|
|
err = "Splitting {!r} into token {!r} and rest {!r}"
|
|
debug(err.format(full_token, token, rest))
|
|
# Handle boolean flag block vs short-flag + value. Make
|
|
# sure not to test the token as a context flag if we've
|
|
# passed into 'storing unknown stuff' territory (e.g. on a
|
|
# core-args pass, handling what are going to be task args)
|
|
have_flag = (
|
|
token in machine.context.flags
|
|
and machine.current_state != "unknown"
|
|
)
|
|
if have_flag and machine.context.flags[token].takes_value:
|
|
msg = "{!r} is a flag for current context & it takes a value, giving it {!r}" # noqa
|
|
debug(msg.format(token, rest))
|
|
mutations.append((index + 1, rest))
|
|
else:
|
|
_rest = ["-{}".format(x) for x in rest]
|
|
msg = "Splitting multi-flag glob {!r} into {!r} and {!r}" # noqa
|
|
debug(msg.format(orig, token, _rest))
|
|
for item in reversed(_rest):
|
|
mutations.append((index + 1, item))
|
|
# Here, we've got some possible mutations queued up, and 'token'
|
|
# may have been overwritten as well. Whether we apply those and
|
|
# continue as-is, or roll it back, depends:
|
|
# - If the parser wasn't waiting for a flag value, we're already on
|
|
# the right track, so apply mutations and move along to the
|
|
# handle() step.
|
|
# - If we ARE waiting for a value, and the flag expecting it ALWAYS
|
|
# wants a value (it's not optional), we go back to using the
|
|
# original token. (TODO: could reorganize this to avoid the
|
|
# sub-parsing in this case, but optimizing for human-facing
|
|
# execution isn't critical.)
|
|
# - Finally, if we are waiting for a value AND it's optional, we
|
|
# inspect the first sub-token/mutation to see if it would otherwise
|
|
# have been a valid flag, and let that determine what we do (if
|
|
# valid, we apply the mutations; if invalid, we reinstate the
|
|
# original token.)
|
|
if machine.waiting_for_flag_value:
|
|
optional = machine.flag and machine.flag.optional
|
|
subtoken_is_valid_flag = token in machine.context.flags
|
|
if not (optional and subtoken_is_valid_flag):
|
|
token = orig
|
|
mutations = []
|
|
for index, value in mutations:
|
|
body.insert(index, value)
|
|
machine.handle(token)
|
|
machine.finish()
|
|
result = machine.result
|
|
result.remainder = " ".join(remainder)
|
|
return result
|
|
|
|
|
|
class ParseMachine(StateMachine):
|
|
initial_state = "context"
|
|
|
|
state("context", enter=["complete_flag", "complete_context"])
|
|
state("unknown", enter=["complete_flag", "complete_context"])
|
|
state("end", enter=["complete_flag", "complete_context"])
|
|
|
|
transition(from_=("context", "unknown"), event="finish", to="end")
|
|
transition(
|
|
from_="context",
|
|
event="see_context",
|
|
action="switch_to_context",
|
|
to="context",
|
|
)
|
|
transition(
|
|
from_=("context", "unknown"),
|
|
event="see_unknown",
|
|
action="store_only",
|
|
to="unknown",
|
|
)
|
|
|
|
def changing_state(self, from_: str, to: str) -> None:
|
|
debug("ParseMachine: {!r} => {!r}".format(from_, to))
|
|
|
|
def __init__(
|
|
self,
|
|
initial: "ParserContext",
|
|
contexts: Lexicon,
|
|
ignore_unknown: bool,
|
|
) -> None:
|
|
# Initialize
|
|
self.ignore_unknown = ignore_unknown
|
|
self.initial = self.context = copy.deepcopy(initial)
|
|
debug("Initialized with context: {!r}".format(self.context))
|
|
self.flag = None
|
|
self.flag_got_value = False
|
|
self.result = ParseResult()
|
|
self.contexts = copy.deepcopy(contexts)
|
|
debug("Available contexts: {!r}".format(self.contexts))
|
|
# In case StateMachine does anything in __init__
|
|
super().__init__()
|
|
|
|
@property
|
|
def waiting_for_flag_value(self) -> bool:
|
|
# Do we have a current flag, and does it expect a value (vs being a
|
|
# bool/toggle)?
|
|
takes_value = self.flag and self.flag.takes_value
|
|
if not takes_value:
|
|
return False
|
|
# OK, this flag is one that takes values.
|
|
# Is it a list type (which has only just been switched to)? Then it'll
|
|
# always accept more values.
|
|
# TODO: how to handle somebody wanting it to be some other iterable
|
|
# like tuple or custom class? Or do we just say unsupported?
|
|
if self.flag.kind is list and not self.flag_got_value:
|
|
return True
|
|
# Not a list, okay. Does it already have a value?
|
|
has_value = self.flag.raw_value is not None
|
|
# If it doesn't have one, we're waiting for one (which tells the parser
|
|
# how to proceed and typically to store the next token.)
|
|
# TODO: in the negative case here, we should do something else instead:
|
|
# - Except, "hey you screwed up, you already gave that flag!"
|
|
# - Overwrite, "oh you changed your mind?" - which requires more work
|
|
# elsewhere too, unfortunately. (Perhaps additional properties on
|
|
# Argument that can be queried, e.g. "arg.is_iterable"?)
|
|
return not has_value
|
|
|
|
def handle(self, token: str) -> None:
|
|
debug("Handling token: {!r}".format(token))
|
|
# Handle unknown state at the top: we don't care about even
|
|
# possibly-valid input if we've encountered unknown input.
|
|
if self.current_state == "unknown":
|
|
debug("Top-of-handle() see_unknown({!r})".format(token))
|
|
self.see_unknown(token)
|
|
return
|
|
# Flag
|
|
if self.context and token in self.context.flags:
|
|
debug("Saw flag {!r}".format(token))
|
|
self.switch_to_flag(token)
|
|
elif self.context and token in self.context.inverse_flags:
|
|
debug("Saw inverse flag {!r}".format(token))
|
|
self.switch_to_flag(token, inverse=True)
|
|
# Value for current flag
|
|
elif self.waiting_for_flag_value:
|
|
debug(
|
|
"We're waiting for a flag value so {!r} must be it?".format(
|
|
token
|
|
)
|
|
) # noqa
|
|
self.see_value(token)
|
|
# Positional args (must come above context-name check in case we still
|
|
# need a posarg and the user legitimately wants to give it a value that
|
|
# just happens to be a valid context name.)
|
|
elif self.context and self.context.missing_positional_args:
|
|
msg = "Context {!r} requires positional args, eating {!r}"
|
|
debug(msg.format(self.context, token))
|
|
self.see_positional_arg(token)
|
|
# New context
|
|
elif token in self.contexts:
|
|
self.see_context(token)
|
|
# Initial-context flag being given as per-task flag (e.g. --help)
|
|
elif self.initial and token in self.initial.flags:
|
|
debug("Saw (initial-context) flag {!r}".format(token))
|
|
flag = self.initial.flags[token]
|
|
# Special-case for core --help flag: context name is used as value.
|
|
if flag.name == "help":
|
|
flag.value = self.context.name
|
|
msg = "Saw --help in a per-task context, setting task name ({!r}) as its value" # noqa
|
|
debug(msg.format(flag.value))
|
|
# All others: just enter the 'switch to flag' parser state
|
|
else:
|
|
# TODO: handle inverse core flags too? There are none at the
|
|
# moment (e.g. --no-dedupe is actually 'no_dedupe', not a
|
|
# default-False 'dedupe') and it's up to us whether we actually
|
|
# put any in place.
|
|
self.switch_to_flag(token)
|
|
# Unknown
|
|
else:
|
|
if not self.ignore_unknown:
|
|
debug("Can't find context named {!r}, erroring".format(token))
|
|
self.error("No idea what {!r} is!".format(token))
|
|
else:
|
|
debug("Bottom-of-handle() see_unknown({!r})".format(token))
|
|
self.see_unknown(token)
|
|
|
|
def store_only(self, token: str) -> None:
|
|
# Start off the unparsed list
|
|
debug("Storing unknown token {!r}".format(token))
|
|
self.result.unparsed.append(token)
|
|
|
|
def complete_context(self) -> None:
|
|
debug(
|
|
"Wrapping up context {!r}".format(
|
|
self.context.name if self.context else self.context
|
|
)
|
|
)
|
|
# Ensure all of context's positional args have been given.
|
|
if self.context and self.context.missing_positional_args:
|
|
err = "'{}' did not receive required positional arguments: {}"
|
|
names = ", ".join(
|
|
"'{}'".format(x.name)
|
|
for x in self.context.missing_positional_args
|
|
)
|
|
self.error(err.format(self.context.name, names))
|
|
if self.context and self.context not in self.result:
|
|
self.result.append(self.context)
|
|
|
|
def switch_to_context(self, name: str) -> None:
|
|
self.context = copy.deepcopy(self.contexts[name])
|
|
debug("Moving to context {!r}".format(name))
|
|
debug("Context args: {!r}".format(self.context.args))
|
|
debug("Context flags: {!r}".format(self.context.flags))
|
|
debug("Context inverse_flags: {!r}".format(self.context.inverse_flags))
|
|
|
|
def complete_flag(self) -> None:
|
|
if self.flag:
|
|
msg = "Completing current flag {} before moving on"
|
|
debug(msg.format(self.flag))
|
|
# Barf if we needed a value and didn't get one
|
|
if (
|
|
self.flag
|
|
and self.flag.takes_value
|
|
and self.flag.raw_value is None
|
|
and not self.flag.optional
|
|
):
|
|
err = "Flag {!r} needed value and was not given one!"
|
|
self.error(err.format(self.flag))
|
|
# Handle optional-value flags; at this point they were not given an
|
|
# explicit value, but they were seen, ergo they should get treated like
|
|
# bools.
|
|
if self.flag and self.flag.raw_value is None and self.flag.optional:
|
|
msg = "Saw optional flag {!r} go by w/ no value; setting to True"
|
|
debug(msg.format(self.flag.name))
|
|
# Skip casting so the bool gets preserved
|
|
self.flag.set_value(True, cast=False)
|
|
|
|
def check_ambiguity(self, value: Any) -> bool:
|
|
"""
|
|
Guard against ambiguity when current flag takes an optional value.
|
|
|
|
.. versionadded:: 1.0
|
|
"""
|
|
# No flag is currently being examined, or one is but it doesn't take an
|
|
# optional value? Ambiguity isn't possible.
|
|
if not (self.flag and self.flag.optional):
|
|
return False
|
|
# We *are* dealing with an optional-value flag, but it's already
|
|
# received a value? There can't be ambiguity here either.
|
|
if self.flag.raw_value is not None:
|
|
return False
|
|
# Otherwise, there *may* be ambiguity if 1 or more of the below tests
|
|
# fail.
|
|
tests = []
|
|
# Unfilled posargs still exist?
|
|
tests.append(self.context and self.context.missing_positional_args)
|
|
# Value matches another valid task/context name?
|
|
tests.append(value in self.contexts)
|
|
if any(tests):
|
|
msg = "{!r} is ambiguous when given after an optional-value flag"
|
|
raise ParseError(msg.format(value))
|
|
|
|
def switch_to_flag(self, flag: str, inverse: bool = False) -> None:
|
|
# Sanity check for ambiguity w/ prior optional-value flag
|
|
self.check_ambiguity(flag)
|
|
# Also tie it off, in case prior had optional value or etc. Seems to be
|
|
# harmless for other kinds of flags. (TODO: this is a serious indicator
|
|
# that we need to move some of this flag-by-flag bookkeeping into the
|
|
# state machine bits, if possible - as-is it was REAL confusing re: why
|
|
# this was manually required!)
|
|
self.complete_flag()
|
|
# Set flag/arg obj
|
|
flag = self.context.inverse_flags[flag] if inverse else flag
|
|
# Update state
|
|
try:
|
|
self.flag = self.context.flags[flag]
|
|
except KeyError as e:
|
|
# Try fallback to initial/core flag
|
|
try:
|
|
self.flag = self.initial.flags[flag]
|
|
except KeyError:
|
|
# If it wasn't in either, raise the original context's
|
|
# exception, as that's more useful / correct.
|
|
raise e
|
|
debug("Moving to flag {!r}".format(self.flag))
|
|
# Bookkeeping for iterable-type flags (where the typical 'value
|
|
# non-empty/nondefault -> clearly it got its value already' test is
|
|
# insufficient)
|
|
self.flag_got_value = False
|
|
# Handle boolean flags (which can immediately be updated)
|
|
if self.flag and not self.flag.takes_value:
|
|
val = not inverse
|
|
debug("Marking seen flag {!r} as {}".format(self.flag, val))
|
|
self.flag.value = val
|
|
|
|
def see_value(self, value: Any) -> None:
|
|
self.check_ambiguity(value)
|
|
if self.flag and self.flag.takes_value:
|
|
debug("Setting flag {!r} to value {!r}".format(self.flag, value))
|
|
self.flag.value = value
|
|
self.flag_got_value = True
|
|
else:
|
|
self.error("Flag {!r} doesn't take any value!".format(self.flag))
|
|
|
|
def see_positional_arg(self, value: Any) -> None:
|
|
for arg in self.context.positional_args:
|
|
if arg.value is None:
|
|
arg.value = value
|
|
break
|
|
|
|
def error(self, msg: str) -> None:
|
|
raise ParseError(msg, self.context)
|