Skip to content

Commit

Permalink
Fix simple value is not recognized in indents #8
Browse files Browse the repository at this point in the history
  • Loading branch information
paveldedik committed Jun 17, 2022
1 parent b9a3870 commit 3986908
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 39 deletions.
13 changes: 9 additions & 4 deletions neon/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,20 @@ def advance(self, allowed=None, skip=None):
:param allowed: Optional list of allowed tokens. Default is
any token. If the found token is not allowed, the function
raises syntax error.
raises syntax error.
:type allowed: :class:`Token` or iterable of tokens
:param skip: If :obj:`True`, a sequence of given token types
:param skip: If specified, a sequence of given token types
is skipped first. Default is :obj:`False`.
:type skip: boolean
:type skip: Token | list[Token]
"""
tok = self.next()
if skip is not None:
while tok.id == skip.id:
skips = (
map(lambda tok: tok.id, skip)
if isinstance(skip, (list, tuple))
else {skip.id}
)
while tok.id in skips:
tok = self.next()
if allowed is None:
return tok
Expand Down
86 changes: 52 additions & 34 deletions neon/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ class Token(object):
#: Regular expression for tokenization.
re = None

@classproperty
def id(cls):
return cls
#: Unique ID of the token.
id = None

@classproperty
def name(cls):
Expand Down Expand Up @@ -76,6 +75,7 @@ class String(Primitive):
re = r"""
(?: "(?:\\.|[^"\\])*" | '(?:\\.|[^'\\])*' )
"""
id = "str"

@classmethod
def do(cls, scanner, string):
Expand All @@ -95,6 +95,7 @@ class Integer(Primitive):
"""Represents integer token."""

re = None
id = "int"

@classmethod
def convert(cls, string):
Expand All @@ -109,6 +110,7 @@ class Float(Primitive):
"""Represents float token."""

re = None
id = "float"

@classmethod
def convert(cls, string):
Expand All @@ -123,6 +125,7 @@ class Boolean(Primitive):
"""Represents boolean token."""

re = None
id = "bool"

_mapping = {
True: variants("true", "yes", "on"),
Expand All @@ -141,6 +144,7 @@ class NoneValue(Primitive):
"""Represents :obj:`None` token."""

re = None
id = "none"

_variants = variants("null")

Expand All @@ -150,6 +154,7 @@ class DateTime(Primitive):
"""Represents datetime token."""

re = None
id = "datetime"

@classmethod
@functools.lru_cache(maxsize=None)
Expand All @@ -169,6 +174,7 @@ class Literal(Token):
(?: [^,:=\]})(\x00-\x20]+ | :(?! [\s,\]})] | $ ) |
[\ \t]+ [^#,:=\]})(\x00-\x20] )*
"""
id = "literal"

@classmethod
def do(cls, scanner, string):
Expand All @@ -184,6 +190,8 @@ def do(cls, scanner, string):
class Symbol(Token):
"""Represents symbol token."""

id = "symbol"

@classproperty
def name(cls):
return "'{}'".format(str(cls.re).replace("\\", ""))
Expand All @@ -198,34 +206,39 @@ class Comma(Symbol):
"""Represents comma token."""

re = r","
id = "comma"


@token
class Colon(Symbol):
"""Represents colon token."""

re = r":"
id = "colon"


@token
class EqualSign(Symbol):
"""Represents equal sign."""

re = r"="
id = "eq"


@token
class Hyphen(Symbol):
"""Represents hyphen token."""

re = r"-"
id = "hyphen"


@token
class LeftRound(Symbol):
"""Represents left round bracket."""

re = r"\("
id = "leftround"

def parse(self, tokens):
data = {}
Expand Down Expand Up @@ -259,13 +272,15 @@ class RightRound(Symbol):
"""Represents right round bracket."""

re = r"\)"
id = "rightround"


@token
class LeftSquare(Symbol):
"""Represents left square bracket."""

re = r"\["
id = "leftsquare"

def parse(self, tokens):
data = []
Expand All @@ -287,13 +302,15 @@ class RightSquare(Symbol):
"""Represents right square bracket."""

re = r"\]"
id = "rightsquare"


@token
class LeftBrace(Symbol):
"""Represents left brace."""

re = r"{"
id = "leftbrace"

def parse(self, tokens):
data = {}
Expand All @@ -316,13 +333,15 @@ class RightBrace(Symbol):
"""Represents right brace."""

re = r"}"
id = "rightbrace"


@token
class Comment(Token):
"""Represents comment token."""

re = r"\s*\#.*"
id = "comment"
do = None # ignore comments


Expand All @@ -331,33 +350,27 @@ class Indent(Token):
"""Represents indent token."""

re = r"^[\t\ ]+"
id = "indent"

def _parse_list(self, tokens):
def _parse_list(self, tokens, tok):
data = []
tok = tokens.advance()

while tok.id not in [Dedent.id, End.id]:
if tokens.peek().id == NewLine.id:
if tokens[1].id == Indent.id:
# account for the list format of
# -
# value1
# -
# value2
tokens.advance()
tok = tokens.advance()
value = tok.parse(tokens)
else:
value = None
while tok.id == Hyphen.id:
old_tok = tok
tok = tokens.advance(skip=(NewLine, Indent))
# in this case, the list looks like this:
# -
# - a
if old_tok.id == tok.id == Hyphen.id:
data.append(None)
if tokens.peek().id == Colon.id:
tokens.advance()
key = tok.parse(tokens)
tok = tokens.advance(skip=NewLine)
value = {key: tok.parse(tokens)}
else:
tok = tokens.advance()
if tokens.peek().id == Colon.id:
tokens.advance()
key = tok.parse(tokens)
tok = tokens.advance(skip=NewLine)
value = {key: tok.parse(tokens)}
else:
value = tok.parse(tokens)
value = tok.parse(tokens)
data.append(value)

tok = tokens.advance((End, NewLine, Dedent))
Expand All @@ -366,9 +379,8 @@ def _parse_list(self, tokens):

return data

def _parse_dict(self, tokens, tok=None):
def _parse_dict(self, tokens, tok):
data = {}
tok = tok or tokens.advance()

while tok.id not in [Dedent.id, End.id]:
key = tok.parse(tokens)
Expand All @@ -389,16 +401,17 @@ def _parse_dict(self, tokens, tok=None):
return data

def parse(self, tokens):
peek = tokens.peek()
tok = tokens.advance()

while peek.id == NewLine.id:
tokens.advance()
peek = tokens.peek()
while tok.id == NewLine.id:
tok = tokens.advance()

if peek.id == Hyphen.id:
return self._parse_list(tokens)
if tok.id == Hyphen.id:
return self._parse_list(tokens, tok)
elif tokens.peek().id == End.id:
return tok.parse(tokens)
else:
return self._parse_dict(tokens)
return self._parse_dict(tokens, tok)

@classmethod
def do(cls, scanner, string):
Expand All @@ -410,13 +423,15 @@ class Dedent(Token):
"""Represents dedent token."""

re = None # this token is generated after the scanning procedure
id = "dedent"


@token
class NewLine(Token):
"""Represents new line token."""

re = r"[\n]+"
id = "newline"

@classmethod
def do(cls, scanner, string):
Expand All @@ -428,6 +443,7 @@ class WhiteSpace(Token):
"""Represents comment token."""

re = r"[\t\ ]+"
id = "whitespace"
do = None # ignore white-spaces


Expand All @@ -436,6 +452,7 @@ class Unknown(Token):
"""Represents unknown character sequence match."""

re = r".*"
id = "unknown"

@classmethod
def do(cls, scanner, token):
Expand All @@ -448,4 +465,5 @@ class End(Token):
"""Represents EOL token."""

re = None
id = "end"
name = "end of file"
2 changes: 1 addition & 1 deletion tests/test_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_simple_list():
"""


def test_simple():
def test_simple_none():
expected = {"a": [None, "d"], "b": {"e": None, "g": "h"}}
assert neon.decode(NEON_SIMPLE) == expected

Expand Down
28 changes: 28 additions & 0 deletions tests/test_edge_cases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import neon

NEON_INDENTED_LIST_VALUE = """
-
aaa
"""


def test_indented_list_value():
assert neon.decode(NEON_INDENTED_LIST_VALUE) == ["aaa"]


NEON_SIMPLE_VALUE = """
hello
"""


def test_simple_value():
assert neon.decode(NEON_SIMPLE_VALUE) == "hello"


NEON_SIMPLE_LIST_VALUE = """
-
"""


def test_simple_list_value():
assert neon.decode(NEON_SIMPLE_LIST_VALUE) == [None]

0 comments on commit 3986908

Please sign in to comment.