// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors // Licensed under the MIT License: // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // Parser combinator framework! // // This file declares several functions which construct parsers, usually taking other parsers as // input, thus making them parser combinators. // // A valid parser is any functor which takes a reference to an input cursor (defined below) as its // input and returns a Maybe. The parser returns null on parse failure, or returns the parsed // result on success. // // An "input cursor" is any type which implements the same interface as IteratorInput, below. Such // a type acts as a pointer to the current input location. When a parser returns successfully, it // will have updated the input cursor to point to the position just past the end of what was parsed. // On failure, the cursor position is unspecified. #ifndef KJ_PARSE_COMMON_H_ #define KJ_PARSE_COMMON_H_ #if defined(__GNUC__) && !KJ_HEADER_WARNINGS #pragma GCC system_header #endif #include "../common.h" #include "../memory.h" #include "../array.h" #include "../tuple.h" #include "../vector.h" #if _MSC_VER #include // result_of_t #endif namespace kj { namespace parse { template class IteratorInput { // A parser input implementation based on an iterator range. public: IteratorInput(Iterator begin, Iterator end) : parent(nullptr), pos(begin), end(end), best(begin) {} explicit IteratorInput(IteratorInput& parent) : parent(&parent), pos(parent.pos), end(parent.end), best(parent.pos) {} ~IteratorInput() { if (parent != nullptr) { parent->best = kj::max(kj::max(pos, best), parent->best); } } KJ_DISALLOW_COPY(IteratorInput); void advanceParent() { parent->pos = pos; } void forgetParent() { parent = nullptr; } bool atEnd() { return pos == end; } auto current() -> decltype(*instance()) { KJ_IREQUIRE(!atEnd()); return *pos; } auto consume() -> decltype(*instance()) { KJ_IREQUIRE(!atEnd()); return *pos++; } void next() { KJ_IREQUIRE(!atEnd()); ++pos; } Iterator getBest() { return kj::max(pos, best); } Iterator getPosition() { return pos; } private: IteratorInput* parent; Iterator pos; Iterator end; Iterator best; // furthest we got with any sub-input }; template struct OutputType_; template struct OutputType_> { typedef T Type; }; template using OutputType = typename OutputType_< #if _MSC_VER std::result_of_t // The instance() based version below results in: // C2064: term does not evaluate to a function taking 1 arguments #else decltype(instance()(instance())) #endif >::Type; // Synonym for the output type of a parser, given the parser type and the input type. // ======================================================================================= template class ParserRef { // Acts as a reference to some other parser, with simplified type. The referenced parser // is polymorphic by virtual call rather than templates. For grammars of non-trivial size, // it is important to inject refs into the grammar here and there to prevent the parser types // from becoming ridiculous. Using too many of them can hurt performance, though. public: ParserRef(): parser(nullptr), wrapper(nullptr) {} ParserRef(const ParserRef&) = default; ParserRef(ParserRef&&) = default; ParserRef& operator=(const ParserRef& other) = default; ParserRef& operator=(ParserRef&& other) = default; template constexpr ParserRef(Other&& other) : parser(&other), wrapper(&WrapperImplInstance>::instance) { static_assert(kj::isReference(), "ParserRef should not be assigned to a temporary."); } template inline ParserRef& operator=(Other&& other) { static_assert(kj::isReference(), "ParserRef should not be assigned to a temporary."); parser = &other; wrapper = &WrapperImplInstance>::instance; return *this; } KJ_ALWAYS_INLINE(Maybe operator()(Input& input) const) { // Always inline in the hopes that this allows branch prediction to kick in so the virtual call // doesn't hurt so much. return wrapper->parse(parser, input); } private: struct Wrapper { virtual Maybe parse(const void* parser, Input& input) const = 0; }; template struct WrapperImpl: public Wrapper { Maybe parse(const void* parser, Input& input) const override { return (*reinterpret_cast(parser))(input); } }; template struct WrapperImplInstance { #if _MSC_VER // TODO(msvc): MSVC currently fails to initialize vtable pointers for constexpr values so // we have to make this just const instead. static const WrapperImpl instance; #else static constexpr WrapperImpl instance = WrapperImpl(); #endif }; const void* parser; const Wrapper* wrapper; }; template template #if _MSC_VER const typename ParserRef::template WrapperImpl ParserRef::WrapperImplInstance::instance = WrapperImpl(); #else constexpr typename ParserRef::template WrapperImpl ParserRef::WrapperImplInstance::instance; #endif template constexpr ParserRef> ref(ParserImpl& impl) { // Constructs a ParserRef. You must specify the input type explicitly, e.g. // `ref(myParser)`. return ParserRef>(impl); } // ------------------------------------------------------------------- // any // Output = one token class Any_ { public: template Maybe().consume())>> operator()(Input& input) const { if (input.atEnd()) { return nullptr; } else { return input.consume(); } } }; constexpr Any_ any = Any_(); // A parser which matches any token and simply returns it. // ------------------------------------------------------------------- // exactly() // Output = Tuple<> template class Exactly_ { public: explicit constexpr Exactly_(T&& expected): expected(expected) {} template Maybe> operator()(Input& input) const { if (input.atEnd() || input.current() != expected) { return nullptr; } else { input.next(); return Tuple<>(); } } private: T expected; }; template constexpr Exactly_ exactly(T&& expected) { // Constructs a parser which succeeds when the input is exactly the token specified. The // result is always the empty tuple. return Exactly_(kj::fwd(expected)); } // ------------------------------------------------------------------- // exactlyConst() // Output = Tuple<> template class ExactlyConst_ { public: explicit constexpr ExactlyConst_() {} template Maybe> operator()(Input& input) const { if (input.atEnd() || input.current() != expected) { return nullptr; } else { input.next(); return Tuple<>(); } } }; template constexpr ExactlyConst_ exactlyConst() { // Constructs a parser which succeeds when the input is exactly the token specified. The // result is always the empty tuple. This parser is templated on the token value which may cause // it to perform better -- or worse. Be sure to measure. return ExactlyConst_(); } // ------------------------------------------------------------------- // constResult() template class ConstResult_ { public: explicit constexpr ConstResult_(SubParser&& subParser, Result&& result) : subParser(kj::fwd(subParser)), result(kj::fwd(result)) {} template Maybe operator()(Input& input) const { if (subParser(input) == nullptr) { return nullptr; } else { return result; } } private: SubParser subParser; Result result; }; template constexpr ConstResult_ constResult(SubParser&& subParser, Result&& result) { // Constructs a parser which returns exactly `result` if `subParser` is successful. return ConstResult_(kj::fwd(subParser), kj::fwd(result)); } template constexpr ConstResult_> discard(SubParser&& subParser) { // Constructs a parser which wraps `subParser` but discards the result. return constResult(kj::fwd(subParser), Tuple<>()); } // ------------------------------------------------------------------- // sequence() // Output = Flattened Tuple of outputs of sub-parsers. template class Sequence_; template class Sequence_ { public: template explicit constexpr Sequence_(T&& firstSubParser, U&&... rest) : first(kj::fwd(firstSubParser)), rest(kj::fwd(rest)...) {} // TODO(msvc): The trailing return types on `operator()` and `parseNext()` expose at least two // bugs in MSVC: // // 1. An ICE. // 2. 'error C2672: 'operator __surrogate_func': no matching overloaded function found)', // which crops up in numerous places when trying to build the capnp command line tools. // // The only workaround I found for both bugs is to omit the trailing return types and instead // rely on C++14's return type deduction. template auto operator()(Input& input) const #ifndef _MSC_VER -> Maybe>(), instance>()...))> #endif { return parseNext(input); } template auto parseNext(Input& input, InitialParams&&... initialParams) const #ifndef _MSC_VER -> Maybe(initialParams)..., instance>(), instance>()...))> #endif { KJ_IF_MAYBE(firstResult, first(input)) { return rest.parseNext(input, kj::fwd(initialParams)..., kj::mv(*firstResult)); } else { // TODO(msvc): MSVC depends on return type deduction to compile this function, so we need to // help it deduce the right type on this code path. return Maybe(initialParams)..., instance>(), instance>()...))>{nullptr}; } } private: FirstSubParser first; Sequence_ rest; }; template <> class Sequence_<> { public: template Maybe> operator()(Input& input) const { return parseNext(input); } template auto parseNext(Input& input, Params&&... params) const -> Maybe(params)...))> { return tuple(kj::fwd(params)...); } }; template constexpr Sequence_ sequence(SubParsers&&... subParsers) { // Constructs a parser that executes each of the parameter parsers in sequence and returns a // tuple of their results. return Sequence_(kj::fwd(subParsers)...); } // ------------------------------------------------------------------- // many() // Output = Array of output of sub-parser, or just a uint count if the sub-parser returns Tuple<>. template class Many_ { template > struct Impl; public: explicit constexpr Many_(SubParser&& subParser) : subParser(kj::fwd(subParser)) {} template auto operator()(Input& input) const -> decltype(Impl::apply(instance(), input)); private: SubParser subParser; }; template template struct Many_::Impl { static Maybe> apply(const SubParser& subParser, Input& input) { typedef Vector> Results; Results results; while (!input.atEnd()) { Input subInput(input); KJ_IF_MAYBE(subResult, subParser(subInput)) { subInput.advanceParent(); results.add(kj::mv(*subResult)); } else { break; } } if (atLeastOne && results.empty()) { return nullptr; } return results.releaseAsArray(); } }; template template struct Many_::Impl> { // If the sub-parser output is Tuple<>, just return a count. static Maybe apply(const SubParser& subParser, Input& input) { uint count = 0; while (!input.atEnd()) { Input subInput(input); KJ_IF_MAYBE(subResult, subParser(subInput)) { subInput.advanceParent(); ++count; } else { break; } } if (atLeastOne && count == 0) { return nullptr; } return count; } }; template template auto Many_::operator()(Input& input) const -> decltype(Impl::apply(instance(), input)) { return Impl>::apply(subParser, input); } template constexpr Many_ many(SubParser&& subParser) { // Constructs a parser that repeatedly executes the given parser until it fails, returning an // Array of the results (or a uint count if `subParser` returns an empty tuple). return Many_(kj::fwd(subParser)); } template constexpr Many_ oneOrMore(SubParser&& subParser) { // Like `many()` but the parser must parse at least one item to be successful. return Many_(kj::fwd(subParser)); } // ------------------------------------------------------------------- // times() // Output = Array of output of sub-parser, or Tuple<> if sub-parser returns Tuple<>. template class Times_ { template > struct Impl; public: explicit constexpr Times_(SubParser&& subParser, uint count) : subParser(kj::fwd(subParser)), count(count) {} template auto operator()(Input& input) const -> decltype(Impl::apply(instance(), instance(), input)); private: SubParser subParser; uint count; }; template template struct Times_::Impl { static Maybe> apply(const SubParser& subParser, uint count, Input& input) { auto results = heapArrayBuilder>(count); while (results.size() < count) { if (input.atEnd()) { return nullptr; } else KJ_IF_MAYBE(subResult, subParser(input)) { results.add(kj::mv(*subResult)); } else { return nullptr; } } return results.finish(); } }; template template struct Times_::Impl> { // If the sub-parser output is Tuple<>, just return a count. static Maybe> apply(const SubParser& subParser, uint count, Input& input) { uint actualCount = 0; while (actualCount < count) { if (input.atEnd()) { return nullptr; } else KJ_IF_MAYBE(subResult, subParser(input)) { ++actualCount; } else { return nullptr; } } return tuple(); } }; template template auto Times_::operator()(Input& input) const -> decltype(Impl::apply(instance(), instance(), input)) { return Impl>::apply(subParser, count, input); } template constexpr Times_ times(SubParser&& subParser, uint count) { // Constructs a parser that repeats the subParser exactly `count` times. return Times_(kj::fwd(subParser), count); } // ------------------------------------------------------------------- // optional() // Output = Maybe template class Optional_ { public: explicit constexpr Optional_(SubParser&& subParser) : subParser(kj::fwd(subParser)) {} template Maybe>> operator()(Input& input) const { typedef Maybe> Result; Input subInput(input); KJ_IF_MAYBE(subResult, subParser(subInput)) { subInput.advanceParent(); return Result(kj::mv(*subResult)); } else { return Result(nullptr); } } private: SubParser subParser; }; template constexpr Optional_ optional(SubParser&& subParser) { // Constructs a parser that accepts zero or one of the given sub-parser, returning a Maybe // of the sub-parser's result. return Optional_(kj::fwd(subParser)); } // ------------------------------------------------------------------- // oneOf() // All SubParsers must have same output type, which becomes the output type of the // OneOfParser. template class OneOf_; template class OneOf_ { public: explicit constexpr OneOf_(FirstSubParser&& firstSubParser, SubParsers&&... rest) : first(kj::fwd(firstSubParser)), rest(kj::fwd(rest)...) {} template Maybe> operator()(Input& input) const { { Input subInput(input); Maybe> firstResult = first(subInput); if (firstResult != nullptr) { subInput.advanceParent(); return kj::mv(firstResult); } } // Hoping for some tail recursion here... return rest(input); } private: FirstSubParser first; OneOf_ rest; }; template <> class OneOf_<> { public: template decltype(nullptr) operator()(Input& input) const { return nullptr; } }; template constexpr OneOf_ oneOf(SubParsers&&... parsers) { // Constructs a parser that accepts one of a set of options. The parser behaves as the first // sub-parser in the list which returns successfully. All of the sub-parsers must return the // same type. return OneOf_(kj::fwd(parsers)...); } // ------------------------------------------------------------------- // transform() // Output = Result of applying transform functor to input value. If input is a tuple, it is // unpacked to form the transformation parameters. template struct Span { public: inline const Position& begin() const { return begin_; } inline const Position& end() const { return end_; } Span() = default; inline constexpr Span(Position&& begin, Position&& end): begin_(mv(begin)), end_(mv(end)) {} private: Position begin_; Position end_; }; template constexpr Span> span(Position&& start, Position&& end) { return Span>(kj::fwd(start), kj::fwd(end)); } template class Transform_ { public: explicit constexpr Transform_(SubParser&& subParser, TransformFunc&& transform) : subParser(kj::fwd(subParser)), transform(kj::fwd(transform)) {} template Maybe(), instance&&>()))> operator()(Input& input) const { KJ_IF_MAYBE(subResult, subParser(input)) { return kj::apply(transform, kj::mv(*subResult)); } else { return nullptr; } } private: SubParser subParser; TransformFunc transform; }; template class TransformOrReject_ { public: explicit constexpr TransformOrReject_(SubParser&& subParser, TransformFunc&& transform) : subParser(kj::fwd(subParser)), transform(kj::fwd(transform)) {} template decltype(kj::apply(instance(), instance&&>())) operator()(Input& input) const { KJ_IF_MAYBE(subResult, subParser(input)) { return kj::apply(transform, kj::mv(*subResult)); } else { return nullptr; } } private: SubParser subParser; TransformFunc transform; }; template class TransformWithLocation_ { public: explicit constexpr TransformWithLocation_(SubParser&& subParser, TransformFunc&& transform) : subParser(kj::fwd(subParser)), transform(kj::fwd(transform)) {} template Maybe(), instance().getPosition())>>>(), instance&&>()))> operator()(Input& input) const { auto start = input.getPosition(); KJ_IF_MAYBE(subResult, subParser(input)) { return kj::apply(transform, Span(kj::mv(start), input.getPosition()), kj::mv(*subResult)); } else { return nullptr; } } private: SubParser subParser; TransformFunc transform; }; template constexpr Transform_ transform( SubParser&& subParser, TransformFunc&& functor) { // Constructs a parser which executes some other parser and then transforms the result by invoking // `functor` on it. Typically `functor` is a lambda. It is invoked using `kj::apply`, // meaning tuples will be unpacked as arguments. return Transform_( kj::fwd(subParser), kj::fwd(functor)); } template constexpr TransformOrReject_ transformOrReject( SubParser&& subParser, TransformFunc&& functor) { // Like `transform()` except that `functor` returns a `Maybe`. If it returns null, parsing fails, // otherwise the parser's result is the content of the `Maybe`. return TransformOrReject_( kj::fwd(subParser), kj::fwd(functor)); } template constexpr TransformWithLocation_ transformWithLocation( SubParser&& subParser, TransformFunc&& functor) { // Like `transform` except that `functor` also takes a `Span` as its first parameter specifying // the location of the parsed content. The span's position type is whatever the parser input's // getPosition() returns. return TransformWithLocation_( kj::fwd(subParser), kj::fwd(functor)); } // ------------------------------------------------------------------- // notLookingAt() // Fails if the given parser succeeds at the current location. template class NotLookingAt_ { public: explicit constexpr NotLookingAt_(SubParser&& subParser) : subParser(kj::fwd(subParser)) {} template Maybe> operator()(Input& input) const { Input subInput(input); subInput.forgetParent(); if (subParser(subInput) == nullptr) { return Tuple<>(); } else { return nullptr; } } private: SubParser subParser; }; template constexpr NotLookingAt_ notLookingAt(SubParser&& subParser) { // Constructs a parser which fails at any position where the given parser succeeds. Otherwise, // it succeeds without consuming any input and returns an empty tuple. return NotLookingAt_(kj::fwd(subParser)); } // ------------------------------------------------------------------- // endOfInput() // Output = Tuple<>, only succeeds if at end-of-input class EndOfInput_ { public: template Maybe> operator()(Input& input) const { if (input.atEnd()) { return Tuple<>(); } else { return nullptr; } } }; constexpr EndOfInput_ endOfInput = EndOfInput_(); // A parser that succeeds only if it is called with no input. } // namespace parse } // namespace kj #endif // KJ_PARSE_COMMON_H_