diff --git a/norminette/lexer/lexer.py b/norminette/lexer/lexer.py index 10158a3..84239f4 100644 --- a/norminette/lexer/lexer.py +++ b/norminette/lexer/lexer.py @@ -35,6 +35,13 @@ def c(a: str, b: str): *c('u', "wb"), *c('u', "i64"), ) +float_suffixes = ( + '', + *"lLfFdD", + "dd", "DD", + "df", "DF", + "dl", "DL", +) INT_LITERAL_PATTERN = re.compile(r""" ^ @@ -324,21 +331,23 @@ def parse_float_literal(self): pos = lineno, column = self.line_pos() src = self.file.source[self.__pos:] if match := FLOAT_EXPONENT_LITERAL_PATTERN.match(src): - suffix = len(match["Suffix"]) - column += len(match["Constant"]) - error = None - if re.match(r"[eE][-+]?\d+", match["Exponent"]) is None: - error = Error.from_name("BAD_EXPONENT") - error.add_highlight(lineno, column, length=len(match["Exponent"]) + suffix) - elif match["Suffix"] not in ('', *"lLfF"): - error = Error.from_name("BAD_FLOAT_SUFFIX") - error.add_highlight(lineno, column + suffix, length=suffix) - if error: - self.file.errors.add(error) - return Token("CONSTANT", pos, self.pop(times=match.end())) - if match := FLOAT_FRACTIONAL_LITERAL_PATTERN.match(src): - # TODO Continue here lol - return Token("CONSTANT", pos, self.pop(times=match.end())) + type = "exponent" + elif match := FLOAT_FRACTIONAL_LITERAL_PATTERN.match(src): + type = "fractional" + else: + return + error = None + suffix = len(match["Suffix"]) + column += len(match["Constant"]) + if type == "exponent" and not re.match(r"[eE][-+]?\d+", match["Exponent"]): + error = Error.from_name("BAD_EXPONENT") + error.add_highlight(lineno, column, length=len(match["Exponent"]) + suffix) + elif match["Suffix"] not in float_suffixes: + error = Error.from_name("BAD_FLOAT_SUFFIX") + error.add_highlight(lineno, column + len(match["Exponent"]), length=suffix) + if error: + self.file.errors.add(error) + return Token("CONSTANT", pos, self.pop(times=match.end())) def parse_multi_line_comment(self) -> Optional[Token]: if self.raw_peek(collect=2) != "/*": diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 17b5b1c..3a01f0f 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -49,7 +49,7 @@ def test_lexer_raw_peek(source: str, parameters: Dict[str, Any], expected: Optio "Escaped newline": ["\\\n", {}, ('\\', 1)], "Times with exact chars": ["abc", {"times": 3}, ("abc", 3)], "Times with trigraphs": [r"??", [ + E.from_name("INVALID_SUFFIX", highlights=[H(lineno=1, column=7, length=1)]), + ]], "Octal integer": ["01234567123", "", []], "Octal integer with U as suffix": ["0123u", "", []], "Octal integer with bad digits": ["00072189", "", [ @@ -289,6 +292,9 @@ def test_lexer_parse_multi_line_comment(source: str, str_expected: str, errors: H(lineno=1, column=len("000123") + 1, length=len("u.23")), ]), ]], + "Hexadecimal with bad suffix": ["0x1uLl;", "", [ + E.from_name("INVALID_SUFFIX", highlights=[H(lineno=1, column=4, length=3)]), + ]], "Integer with u suffix": ["123u", "", []], "Integer with U suffix": ["123U", "", []], "Integer with uz suffix": ["123uz", "", []], @@ -302,9 +308,7 @@ def test_lexer_parse_multi_line_comment(source: str, str_expected: str, errors: "Integer with ll suffix": ["9000000000ll", "", []], "Integer with LL suffix": ["9000000000LL", "", []], "Integer with bad suffix": ["10Uu", "", [ - E.from_name("INVALID_SUFFIX", highlights=[ - H(lineno=1, column=1, length=len("10")), - ]), + E.from_name("INVALID_SUFFIX", highlights=[H(lineno=1, column=3, length=len("10"))]), ]], })) def test_lexer_parse_integer_literal(source: str, str_expected: str, errors: List[E]): @@ -323,32 +327,42 @@ def test_lexer_parse_integer_literal(source: str, str_expected: str, errors: Lis E.from_name("BAD_EXPONENT", highlights=[H(lineno=1, column=2, length=7)]), ]], "Exponent with sign": ["1e+3", "", []], - "": ["45e++ai", "None", []], - "": ["e42", "None", []], - "": ["0x1uLl;", "None", []], - "": [".0e4x;", "None", []], - "": ["10ul;", "None", []], - "": ["10lul;", "None", []], - "": ["0x1uLl;", "None", []], - "": ["0x1ULl;", "None", []], - "": ["0x1lL;", "None", []], - "": ["0x1Ll;", "None", []], - "": ["0x1UlL;", "None", []], + "Bad float followed by an unary expression": ["45e++ai", "", [ + E.from_name("BAD_EXPONENT", highlights=[H(lineno=1, column=3, length=2)]), + ]], + "Identifier with numbers": ["e42", "None", []], + "Fractional exponent with bad suffix": [".0e4x;", "", [ + E.from_name("BAD_FLOAT_SUFFIX", highlights=[H(lineno=1, column=5, length=1)]), + ]], "Integer with bad suffix": ["10uu", "None", []], - "": ["10Uu", "None", []], - "": ["10UU", "None", []], - "": ["0b0101e", "None", []], - "": ["0b0101f", "None", []], - "": ["0b0X101f", "None", []], - "": ["0X101Uf", "None", []], - "": ["0101f", "None", []], - "": ["10.12fe10", "None", []], - "": ["10.fU", "None", []], - "": ["21.3E56E4654", "None", []], - "": ["105e4d", "None", []], - "": ["105flu", "None", []], - "": ["105fu", "None", []], - "": ["105eu", "None", []], + "Bad suffix with all parts": ["10.12fe10", "", [ + E.from_name("BAD_FLOAT_SUFFIX", highlights=[H(lineno=1, column=6, length=len("fe10"))]), + ]], + "Float without fractional part but with suffix": ["10.f", "", []], + "Float without fractional part but bad suffix": ["10.fU", "", [ + E.from_name("BAD_FLOAT_SUFFIX", highlights=[H(lineno=1, column=4, length=2)]), + ]], + "Real bad suffix": ["21.3E56E4654", "", [ + E.from_name("BAD_FLOAT_SUFFIX", highlights=[H(lineno=1, column=8, length=5)]), + ]], + "Exponent with D suffix": ["105e4d", "", []], + "Bad exponent followed by a suffix": ["105eu", "", [ + E.from_name("BAD_EXPONENT", highlights=[H(lineno=1, column=4, length=2)]), + ]], + # TODO Add tests for hexadecimal floats + **{ + # https://www.gnu.org/software/c-intro-and-ref/manual/html_node/Floating-Constants.html + f"Float GNU {number}": [source, f"", []] + for number, source in enumerate(( + "1500.0", "15e2", "15e+2", "15.0e2", "1.5e+3", ".15e4", "15000e-1", + "1.0", "1000.", "3.14159", ".05", ".0005", "1e0", "1.0000e0", "100e1", + "100e+1", "100E+1", "1e3", "10000e-1", "3.14159e0", "5e-2", ".0005e+2", + "5E-2", ".0005E2", ".05e-2", "3.14159f", "3.14159e0f", "1000.f", "100E1F", + ".0005f", ".05e-2f", + # "0xAp2", "0xAp-1", "0x2.0Bp4", "0xE.2p3", "0x123.ABCp0", + # "0x123.ABCp4", "0x100p-8", "0x10p-4", "0x1p+4", "0x1p+8", + )) + } })) def test_lexer_parse_float_literal(source: str, str_expected: str, errors: List[E]): lexer = lexer_from_source(source)