From 939ccb2af470ec479b08509e29bd77f1a4b26692 Mon Sep 17 00:00:00 2001 From: yhirose Date: Wed, 27 Mar 2024 12:49:54 -0400 Subject: [PATCH] Fix #291 --- README.md | 8 ++++++++ peglib.h | 25 ++++++++++++++++++++----- test/test2.cc | 14 ++++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6c59069..b1b4f21 100644 --- a/README.md +++ b/README.md @@ -347,6 +347,14 @@ START <- 'This month is ' MONTH '.' MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...' ``` +We are able to find which item is matched with `choice()`. + +```cpp +parser["MONTH"] = [](const SemanticValues &vs) { + auto id = vs.choice(); +}; +``` + It supports the case insensitive mode. ```peg diff --git a/peglib.h b/peglib.h index 4f320e8..d315921 100644 --- a/peglib.h +++ b/peglib.h @@ -379,6 +379,7 @@ class Trie { public: Trie(const std::vector &items, bool ignore_case) : ignore_case_(ignore_case) { + size_t id = 0; for (const auto &item : items) { for (size_t len = 1; len <= item.size(); len++) { auto last = len == item.size(); @@ -386,17 +387,18 @@ class Trie { std::string_view sv(s.data(), len); auto it = dic_.find(sv); if (it == dic_.end()) { - dic_.emplace(sv, Info{last, last}); + dic_.emplace(sv, Info{last, last, id}); } else if (last) { it->second.match = true; } else { it->second.done = false; } } + id++; } } - size_t match(const char *text, size_t text_len) const { + size_t match(const char *text, size_t text_len, size_t &id) const { size_t match_len = 0; auto done = false; size_t len = 1; @@ -407,7 +409,10 @@ class Trie { if (it == dic_.end()) { done = true; } else { - if (it->second.match) { match_len = len; } + if (it->second.match) { + match_len = len; + id = it->second.id; + } if (it->second.done) { done = true; } } len += 1; @@ -415,6 +420,8 @@ class Trie { return match_len; } + size_t size() const { return dic_.size(); } + private: std::string to_lower(std::string s) const { for (char &c : s) { @@ -426,6 +433,7 @@ class Trie { struct Info { bool done; bool match; + size_t id; }; // TODO: Use unordered_map when heterogeneous lookup is supported in C++20 @@ -580,6 +588,7 @@ struct SemanticValues : protected std::vector { private: friend class Context; + friend class Dictionary; friend class Sequence; friend class PrioritizedChoice; friend class Repetition; @@ -2673,12 +2682,17 @@ inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs, inline size_t Dictionary::parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const { - auto i = trie_.match(s, n); + size_t id; + auto i = trie_.match(s, n, id); + if (i == 0) { c.set_error_pos(s); return static_cast(-1); } + vs.choice_count_ = trie_.size(); + vs.choice_ = id; + // Word check if (c.wordOpe) { auto save_ignore_trace_state = c.ignore_trace_state; @@ -2792,7 +2806,8 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs, auto tok_ptr = dynamic_cast(ope_ptr); if (tok_ptr) { ope_ptr = tok_ptr->ope_.get(); } } - if (!dynamic_cast(ope_ptr)) { + if (!dynamic_cast(ope_ptr) && + !dynamic_cast(ope_ptr)) { chvs.choice_count_ = 0; chvs.choice_ = 0; } diff --git a/test/test2.cc b/test/test2.cc index 04bf6e7..02ad145 100644 --- a/test/test2.cc +++ b/test/test2.cc @@ -1429,6 +1429,20 @@ TEST(DicTest, Dictionary_invalid) { EXPECT_FALSE(ret); } +TEST(DicTest, Dictionary_index) { + parser parser(R"( + START <- 'This month is ' MONTH '.' + MONTH <- 'Jan' | 'January' | 'Feb' | 'February' + )"); + + parser["MONTH"] = [](const SemanticValues &vs) { + EXPECT_EQ("Feb", vs.token()); + EXPECT_EQ(2, vs.choice()); + }; + + EXPECT_TRUE(parser.parse("This month is Feb.")); +} + TEST(ErrorTest, Default_error_handling_1) { parser pg(R"( S <- '@' A B