Source code for godot_project_parser._parser

#!/usr/bin/env python3
#
#  _parser.py
"""
Internal tomli-based parser.
"""
#
#  From https://github.com/hukkin/tomli/blob/master/src/tomli/_parser.py
#
#  Copyright © 2021 Taneli Hukkinen
#  Changes Copyright © 2025 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
from typing import IO, Any, Dict, Final, List, Optional, Tuple

# 3rd party
from tomli._parser import (
		ILLEGAL_BASIC_STR_CHARS,
		ILLEGAL_COMMENT_CHARS,
		ILLEGAL_LITERAL_STR_CHARS,
		ILLEGAL_MULTILINE_BASIC_STR_CHARS,
		MAX_INLINE_NESTING,
		TOML_WS,
		TOML_WS_AND_NEWLINE,
		Flags,
		NestedDict,
		TOMLDecodeError,
		make_safe_parse_float,
		parse_basic_str_escape,
		parse_multiline_str,
		skip_chars,
		skip_until
		)
from tomli._re import RE_DATETIME, RE_LOCALTIME, RE_NUMBER, match_to_datetime, match_to_localtime, match_to_number
from tomli._types import Key, ParseFloat, Pos

# this package
from godot_project_parser.types import GodotObject, PackedStringArray

__all__ = [
		"Output",
		"create_dict_rule",
		"create_list_rule",
		"key_value_rule",
		"load",
		"loads",
		"parse_array",
		"parse_basic_str",
		"parse_basic_str_escape_multiline",
		"parse_inline_table",
		"parse_key",
		"parse_key_part",
		"parse_key_value_pair",
		"parse_literal_str",
		"parse_object",
		"parse_one_line_basic_str",
		"parse_packed_string_array",
		"parse_value",
		"skip_comment",
		"skip_comments_and_array_ws",
		]

BARE_KEY_CHARS: Final = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_/.")
KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")


[docs]def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> Dict[str, Any]: """ Parse ``project.godot`` from a binary file object. :param __fp: :param parse_float: """ b = __fp.read() try: s = b.decode() except AttributeError: raise TypeError("File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`") from None return loads(s, parse_float=parse_float)
[docs]def loads(__s: str, *, parse_float: ParseFloat = float) -> Dict[str, Any]: """ Parse ``project.godot`` from a string. :param __s: :param parse_float: """ # The spec allows converting "\r\n" to "\n", even in string # literals. Let's do so to simplify parsing. try: src = __s.replace("\r\n", '\n') except (AttributeError, TypeError): raise TypeError(f"Expected str object, not '{type(__s).__qualname__}'") from None pos = 0 out = Output() header: Key = () parse_float = make_safe_parse_float(parse_float) # Parse one statement at a time # (typically means one line in TOML source) while True: # 1. Skip line leading whitespace pos = skip_chars(src, pos, TOML_WS) # 2. Parse rules. Expect one of the following: # - end of file # - end of line # - comment # - key/value pair # - append dict to list (and move to its namespace) # - create dict (and move to its namespace) # Skip trailing whitespace when applicable. try: char = src[pos] except IndexError: break if char == '\n': pos += 1 continue if char in KEY_INITIAL_CHARS: pos = key_value_rule(src, pos, out, header, parse_float) pos = skip_chars(src, pos, TOML_WS) elif char == '[': try: second_char: Optional[str] = src[pos + 1] except IndexError: second_char = None out.flags.finalize_pending() if second_char == '[': pos, header = create_list_rule(src, pos, out) else: pos, header = create_dict_rule(src, pos, out) pos = skip_chars(src, pos, TOML_WS) elif char not in "#;": raise TOMLDecodeError("Invalid statement", src, pos) # 3. Skip comment pos = skip_comment(src, pos) # 4. Expect end of line or end of file try: char = src[pos] except IndexError: break if char != '\n': raise TOMLDecodeError("Expected newline or end of document after a statement", src, pos) pos += 1 return out.data.dict
class Output: def __init__(self) -> None: self.data = NestedDict() self.flags = Flags() def skip_comment(src: str, pos: Pos) -> Pos: try: char: str = src[pos] except IndexError: return pos if char in "#;": return skip_until(src, pos + 1, '\n', error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False) return pos def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos: while True: pos_before_skip = pos pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) pos = skip_comment(src, pos) if pos == pos_before_skip: return pos def create_dict_rule(src: str, pos: Pos, out: Output) -> Tuple[Pos, Key]: pos += 1 # Skip "[" pos = skip_chars(src, pos, TOML_WS) pos, key = parse_key(src, pos) if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN): raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos) out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) try: out.data.get_or_create_nest(key) except KeyError: raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None if not src.startswith(']', pos): raise TOMLDecodeError("Expected ']' at the end of a table declaration", src, pos) return pos + 1, key def create_list_rule(src: str, pos: Pos, out: Output) -> Tuple[Pos, Key]: pos += 2 # Skip "[[" pos = skip_chars(src, pos, TOML_WS) pos, key = parse_key(src, pos) if out.flags.is_(key, Flags.FROZEN): raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) # Free the namespace now that it points to another empty list item... out.flags.unset_all(key) # ...but this key precisely is still prohibited from table declaration out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) try: out.data.append_nest_to_list(key) except KeyError: raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None if not src.startswith("]]", pos): raise TOMLDecodeError("Expected ']]' at the end of an array declaration", src, pos) return pos + 2, key def key_value_rule(src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat) -> Pos: pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0) key_parent, key_stem = key[:-1], key[-1] abs_key_parent = header + key_parent relative_path_cont_keys = (header + key[:i] for i in range(1, len(key))) for cont_key in relative_path_cont_keys: # Check that dotted key syntax does not redefine an existing table if out.flags.is_(cont_key, Flags.EXPLICIT_NEST): raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos) # Containers in the relative path can't be opened with the table syntax or # dotted key/value syntax in following table sections. out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST) if out.flags.is_(abs_key_parent, Flags.FROZEN): raise TOMLDecodeError(f"Cannot mutate immutable namespace {abs_key_parent}", src, pos) try: nest = out.data.get_or_create_nest(abs_key_parent) except KeyError: raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None if key_stem in nest: raise TOMLDecodeError("Cannot overwrite a value", src, pos) # Mark inline table and array namespaces recursively immutable if isinstance(value, (dict, list)): out.flags.set(header + key, Flags.FROZEN, recursive=True) nest[key_stem] = value return pos def parse_key_value_pair(src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int) -> Tuple[Pos, Key, Any]: pos, key = parse_key(src, pos) try: char: Optional[str] = src[pos] except IndexError: char = None if char is None or char not in "=:": raise TOMLDecodeError("Expected '=' or ':' after a key in a key/value pair", src, pos) pos += 1 pos = skip_chars(src, pos, TOML_WS) pos, value = parse_value(src, pos, parse_float, nest_lvl) return pos, key, value def parse_key(src: str, pos: Pos) -> Tuple[Pos, Key]: pos, key_part = parse_key_part(src, pos) key: Key = (key_part, ) pos = skip_chars(src, pos, TOML_WS) while True: try: char: Optional[str] = src[pos] except IndexError: char = None if char != '.': return pos, key pos += 1 pos = skip_chars(src, pos, TOML_WS) pos, key_part = parse_key_part(src, pos) key += (key_part, ) pos = skip_chars(src, pos, TOML_WS) def parse_key_part(src: str, pos: Pos) -> Tuple[Pos, str]: try: char: Optional[str] = src[pos] except IndexError: char = None if char in BARE_KEY_CHARS: start_pos = pos pos = skip_chars(src, pos, BARE_KEY_CHARS) return pos, src[start_pos:pos] if char == "'": return parse_literal_str(src, pos) if char == '"': return parse_one_line_basic_str(src, pos) raise TOMLDecodeError("Invalid initial character for a key part", src, pos) def parse_one_line_basic_str(src: str, pos: Pos) -> Tuple[Pos, str]: pos += 1 return parse_basic_str(src, pos, multiline=False) def parse_array(src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int) -> Tuple[Pos, List[Any]]: pos += 1 array: List[Any] = [] pos = skip_comments_and_array_ws(src, pos) if src.startswith(']', pos): return pos + 1, array while True: pos, val = parse_value(src, pos, parse_float, nest_lvl) array.append(val) pos = skip_comments_and_array_ws(src, pos) c = src[pos:pos + 1] if c == ']': return pos + 1, array if c != ',': raise TOMLDecodeError("Unclosed array", src, pos) pos += 1 pos = skip_comments_and_array_ws(src, pos) if src.startswith(']', pos): return pos + 1, array def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int) -> Tuple[Pos, Dict[str, Any]]: pos += 1 nested_dict = NestedDict() flags = Flags() pos = skip_chars(src, pos, TOML_WS) if src.startswith('}', pos): return pos + 1, nested_dict.dict while True: pos = skip_chars(src, pos, '\n') pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl) key_parent, key_stem = key[:-1], key[-1] if flags.is_(key, Flags.FROZEN): raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) try: nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) except KeyError: raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None if key_stem in nest: raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos) nest[key_stem] = value pos = skip_chars(src, pos, TOML_WS) pos = skip_chars(src, pos, '\n') c = src[pos:pos + 1] if c == '}': return pos + 1, nested_dict.dict if c != ',': raise TOMLDecodeError("Unclosed inline table", src, pos) if isinstance(value, (dict, list)): flags.set(key, Flags.FROZEN, recursive=True) pos += 1 pos = skip_chars(src, pos, TOML_WS) def parse_basic_str_escape_multiline(src: str, pos: Pos) -> Tuple[Pos, str]: return parse_basic_str_escape(src, pos, multiline=True) def parse_literal_str(src: str, pos: Pos) -> Tuple[Pos, str]: pos += 1 # Skip starting apostrophe start_pos = pos pos = skip_until(src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True) return pos + 1, src[start_pos:pos] # Skip ending apostrophe ## Can there be multiline strings? # def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> Tuple[Pos, str]: # pos += 3 # if src.startswith('\n', pos): # pos += 1 # if literal: # delim = "'" # end_pos = skip_until( # src, # pos, # "'''", # error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS, # error_on_eof=True, # ) # result = src[pos:end_pos] # pos = end_pos + 3 # else: # delim = '"' # pos, result = parse_basic_str(src, pos, multiline=True) # # Add at maximum two extra apostrophes/quotes if the end sequence # # is 4 or 5 chars long instead of just 3. # if not src.startswith(delim, pos): # return pos, result # pos += 1 # if not src.startswith(delim, pos): # return pos, result + delim # pos += 1 # return pos, result + (delim * 2) def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> Tuple[Pos, str]: if multiline: error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS parse_escapes = parse_basic_str_escape_multiline else: error_on = ILLEGAL_BASIC_STR_CHARS parse_escapes = parse_basic_str_escape result = '' start_pos = pos while True: try: char = src[pos] except IndexError: raise TOMLDecodeError("Unterminated string", src, pos) from None if char == '"': if not multiline: return pos + 1, result + src[start_pos:pos] if src.startswith('"""', pos): return pos + 3, result + src[start_pos:pos] pos += 1 continue if char == '\\': result += src[start_pos:pos] pos, parsed_escape = parse_escapes(src, pos) result += parsed_escape start_pos = pos continue if char in error_on: raise TOMLDecodeError(f"Illegal character {char!r}", src, pos) pos += 1 def parse_value(src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int) -> Tuple[Pos, Any]: if nest_lvl > MAX_INLINE_NESTING: # Pure Python should have raised RecursionError already. # This ensures mypyc binaries eventually do the same. raise RecursionError( # pragma: no cover "TOML inline arrays/tables are nested more than the allowed" f" {MAX_INLINE_NESTING} levels" ) try: char: Optional[str] = src[pos] except IndexError: char = None # IMPORTANT: order conditions based on speed of checking and likelihood # Basic strings if char == '"': if src.startswith('"""', pos): return parse_multiline_str(src, pos, literal=False) return parse_one_line_basic_str(src, pos) # Literal strings if char == "'": if src.startswith("'''", pos): return parse_multiline_str(src, pos, literal=True) return parse_literal_str(src, pos) # Booleans if char == 't': if src.startswith("true", pos): return pos + 4, True if char == 'f': if src.startswith("false", pos): return pos + 5, False if char == 'P': if src.startswith("PackedStringArray", pos): return parse_packed_string_array(src, pos, parse_float, nest_lvl=nest_lvl) if char == 'O': if src.startswith("Object", pos): return parse_object(src, pos, parse_float, nest_lvl=nest_lvl) if char == 'n': if src.startswith("null", pos): return pos + 4, None # Arrays if char == '[': return parse_array(src, pos, parse_float, nest_lvl + 1) # Inline tables if char == '{': return parse_inline_table(src, pos, parse_float, nest_lvl + 1) # Dates and times datetime_match = RE_DATETIME.match(src, pos) if datetime_match: try: datetime_obj = match_to_datetime(datetime_match) except ValueError as e: raise TOMLDecodeError("Invalid date or datetime", src, pos) from e return datetime_match.end(), datetime_obj localtime_match = RE_LOCALTIME.match(src, pos) if localtime_match: return localtime_match.end(), match_to_localtime(localtime_match) # Integers and "normal" floats. # The regex will greedily match any type starting with a decimal # char, so needs to be located after handling of dates and times. number_match = RE_NUMBER.match(src, pos) if number_match: return number_match.end(), match_to_number(number_match, parse_float) # Special floats first_three = src[pos:pos + 3] if first_three in {"inf", "nan"}: return pos + 3, parse_float(first_three) first_four = src[pos:pos + 4] if first_four in {"-inf", "+inf", "-nan", "+nan"}: return pos + 4, parse_float(first_four) raise TOMLDecodeError("Invalid value", src, pos) def parse_packed_string_array( src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int, ) -> Tuple[Pos, PackedStringArray]: pos += len("PackedStringArray") # Skip 'PackedStringArray' pos += 1 # Skip '(' array: List[str] = [] while True: pos, val = parse_value(src, pos, parse_float, nest_lvl=nest_lvl) array.append(val) pos = skip_comments_and_array_ws(src, pos) c = src[pos:pos + 1] if c == ')': return pos + 1, PackedStringArray(array) if c != ',': raise TOMLDecodeError("Unclosed PackedStringArray", src, pos) pos += 1 pos = skip_comments_and_array_ws(src, pos) if src.startswith(')', pos): return pos + 1, PackedStringArray(array) def parse_object(src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int) -> Tuple[Pos, GodotObject]: pos += len("Object") # Skip 'Object' pos += 1 # Skip '(' start_pos = pos pos = skip_until(src, pos, ',', error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True) object_name = src[start_pos:pos] pos += 1 # Skip comma nested_dict = NestedDict() flags = Flags() pos = skip_chars(src, pos, TOML_WS) if src.startswith(')', pos): return pos + 1, GodotObject(object_name, nested_dict.dict) while True: pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=nest_lvl) key_parent, key_stem = key[:-1], key[-1] if flags.is_(key, Flags.FROZEN): raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) try: nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) except KeyError: raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None if key_stem in nest: raise TOMLDecodeError(f"Duplicate Object key {key_stem!r}", src, pos) nest[key_stem] = value pos = skip_chars(src, pos, TOML_WS) c = src[pos:pos + 1] if c == ')': return pos + 1, GodotObject(object_name, nested_dict.dict) if c != ',': raise TOMLDecodeError("Unclosed Object", src, pos) if isinstance(value, (dict, list)): flags.set(key, Flags.FROZEN, recursive=True) pos += 1 pos = skip_chars(src, pos, TOML_WS)