Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JSON] Raise exceptions when "sibling" keywords are unhandled #1063

Merged
merged 8 commits into from
Oct 29, 2024
64 changes: 52 additions & 12 deletions guidance/library/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,17 +165,17 @@ class ObjectKeywords(str, Enum):
JSONType.OBJECT: ObjectKeywords,
}

DEFS_KEYS = {"$defs", "definitions"}

IGNORED_KEYS = {
"$anchor",
"$defs",
"$schema",
"$id",
"id",
"$comment",
"title",
"description",
"default",
"definitions",
"description",
"examples",
}

Expand All @@ -188,7 +188,7 @@ class ObjectKeywords(str, Enum):
IGNORED_KEYS.add("discriminator")

WHITESPACE = {b" ", b"\t", b"\n", b"\r"}
VALID_KEYS = set(Keyword) | IGNORED_KEYS | DEFS_KEYS | set(NumberKeywords) | set(StringKeywords) | set(ArrayKeywords) | set(ObjectKeywords)
VALID_KEYS = set(Keyword) | set(NumberKeywords) | set(StringKeywords) | set(ArrayKeywords) | set(ObjectKeywords) | IGNORED_KEYS

FORMAT_PATTERNS: dict[str, Optional[str]] = {
# https://json-schema.org/understanding-json-schema/reference/string#built-in-formats
Expand Down Expand Up @@ -398,6 +398,11 @@ def validate_json_node_keys(node: Mapping[str, Any]):
)


def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]:
# Get the set of functional (non-ignored) keys that are siblings of the given key
return set(node.keys()) & VALID_KEYS - set(IGNORED_KEYS) - {key}


class GenJson:
item_separator = ", "
key_separator = ": "
Expand Down Expand Up @@ -723,7 +728,20 @@ def const(
lm,
*,
value: Union[None, bool, int, float, str, Mapping, Sequence],
instance_type: Optional[Union[str, Sequence[str]]] = None,
enum: Optional[Sequence[Union[None, bool, int, float, str, Mapping, Sequence]]] = None,
):
schema_to_validate_against: dict[str, Any] = {}
if instance_type is not None:
schema_to_validate_against["type"] = instance_type
if enum is not None:
schema_to_validate_against["enum"] = enum
if schema_to_validate_against:
# Raise a validation error if the value doesn't match the type
jsonschema.validate(
instance=value,
schema=schema_to_validate_against,
)
# Base case
if isinstance(value, (type(None), bool, int, float, str)):
return lm + json_dumps(value)
Expand Down Expand Up @@ -756,14 +774,18 @@ def enum(
self,
lm,
*,
options: Sequence[Mapping[str, Any]]
options: Sequence[Union[None, bool, int, float, str, Mapping, Sequence]],
instance_type: Optional[Union[str, Sequence[str]]] = None,
):
# TODO: can we support a whitespace-flexible version of this?
all_opts: list[GrammarFunction] = []
for opt in options:
all_opts.append(
self.const(value=opt)
)
for instance in options:
try:
grm = self.const(value=instance, instance_type=instance_type)
except jsonschema.ValidationError:
continue
all_opts.append(grm)
if not all_opts:
raise ValueError(f"No valid options found for enum with type {instance_type!r}: {options}")
return lm + select(options=all_opts)


Expand Down Expand Up @@ -811,29 +833,47 @@ def json(
validate_json_node_keys(json_schema)

if Keyword.ANYOF in json_schema:
sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF)
if sibling_keys:
raise NotImplementedError(f"anyOf with sibling keys is not yet supported. Got {sibling_keys}")
return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF])

if Keyword.ALLOF in json_schema:
sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF)
if sibling_keys:
raise NotImplementedError(f"allOf with sibling keys is not yet supported. Got {sibling_keys}")
allof_list = json_schema[Keyword.ALLOF]
if len(allof_list) != 1:
raise ValueError("Only support allOf with exactly one item")
return lm + self.json(json_schema=allof_list[0])

if Keyword.ONEOF in json_schema:
sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF)
if sibling_keys:
raise NotImplementedError(f"oneOf with sibling keys is not yet supported. Got {sibling_keys}")
oneof_list = json_schema[Keyword.ONEOF]
if len(oneof_list) == 1:
return lm + self.json(json_schema=oneof_list[0])
warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")
return lm + self.anyOf(anyof_list=oneof_list)

if Keyword.REF in json_schema:
sibling_keys = get_sibling_keys(json_schema, Keyword.REF)
if sibling_keys:
raise NotImplementedError(f"$ref with sibling keys is not yet supported. Got {sibling_keys}")
return lm + self.ref(reference=json_schema[Keyword.REF])

if Keyword.CONST in json_schema:
return lm + self.const(value=json_schema[Keyword.CONST])
sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM}
if sibling_keys:
raise NotImplementedError(f"const with sibling keys is not yet supported. Got {sibling_keys}")
return lm + self.const(value=json_schema[Keyword.CONST], instance_type=json_schema.get(Keyword.TYPE, None), enum=json_schema.get(Keyword.ENUM, None))

if Keyword.ENUM in json_schema:
return lm + self.enum(options=json_schema[Keyword.ENUM])
sibling_keys = get_sibling_keys(json_schema, Keyword.ENUM) - {Keyword.TYPE}
if sibling_keys:
raise NotImplementedError(f"enum with sibling keys is not yet supported. Got {sibling_keys}")
return lm + self.enum(options=json_schema[Keyword.ENUM], instance_type=json_schema.get(Keyword.TYPE, None))

if Keyword.TYPE in json_schema:
target_types = cast(Union[str, Sequence[str]], json_schema[Keyword.TYPE])
Expand Down
133 changes: 121 additions & 12 deletions tests/unit/library/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,14 +1265,12 @@ def test_nested_refs(self, test_object, valid):
# ref valid, maxItems valid
({"foo": []}, True),
# ref valid, maxItems invalid
pytest.param(
*({"foo": [1, 2, 3]}, False),
marks=pytest.mark.xfail(reason="sibling keywords to ref are not yet supported"),
),
({"foo": [1, 2, 3]}, False),
# ref invalid
({"foo": "string"}, False),
],
)
@pytest.mark.xfail(reason="sibling keywords to ref are not yet supported")
def test_ref_applies_alongside_sibling_keywords(self, test_object, valid):
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
Expand Down Expand Up @@ -1559,12 +1557,10 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
# invalid on outer field
({"foo": {"bar": "a"}, "bar": 1}, False),
# valid on both fields
pytest.param(
*({"foo": {"bar": "a"}, "bar": "a"}, True),
marks=pytest.mark.xfail(reason="refs with sibling keywords are not yet supported; foo here is being seen as an additionalProperty before bar"),
),
({"foo": {"bar": "a"}, "bar": "a"}, True),
],
)
@pytest.mark.xfail(reason="refs with sibling keywords are not yet supported")
def test_refs_with_relative_uris_and_defs(self, test_object, valid):
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
Expand Down Expand Up @@ -1594,12 +1590,10 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid):
# invalid on outer field
({"foo": {"bar": "a"}, "bar": 1}, False),
# valid on both fields
pytest.param(
*({"foo": {"bar": "a"}, "bar": "a"}, True),
marks=pytest.mark.xfail(reason="refs with sibling keywords are not yet supported; foo here is being seen as an additionalProperty before bar"),
),
({"foo": {"bar": "a"}, "bar": "a"}, True),
],
)
@pytest.mark.xfail(reason="refs with sibling keywords are not yet supported")
def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid):
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
Expand Down Expand Up @@ -2354,6 +2348,60 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes)
schema_obj=schema_obj,
)

@pytest.mark.parametrize(
"obj, valid",
[
(1, True),
(2, False),
("2", False),
("1", False),
(True, False),
]
)
def test_typed_enum_single_type(self, obj, valid):
schema_obj = {
"enum": [1, "2", True],
"type": "integer"
}
if valid:
validate(instance=obj, schema=schema_obj)
generate_and_check(obj, schema_obj)
else:
with pytest.raises(ValidationError):
validate(instance=obj, schema=schema_obj)
check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj)

@pytest.mark.parametrize(
"obj, valid",
[
(1, True),
(2, False),
("2", True),
("1", False),
(True, False),
]
)
def test_typed_enum_multiple_types(self, obj, valid):
schema_obj = {
"enum": [1, "2", True],
"type": ["integer", "string"]
}
if valid:
validate(instance=obj, schema=schema_obj)
generate_and_check(obj, schema_obj)
else:
with pytest.raises(ValidationError):
validate(instance=obj, schema=schema_obj)
check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj)

def test_invalid_typed_enum(self):
schema_obj = {
"enum": [1, "2"],
"type": "boolean"
}
with pytest.raises(ValueError) as ve:
gen_json(schema=schema_obj)
assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']"

class TestConst:
def test_constant_int(self):
Expand Down Expand Up @@ -2413,6 +2461,67 @@ def test_constant_precedence(self):
schema_obj=schema_obj,
)

def test_valid_typed_const(self):
schema_obj = {
"const": 1,
"type": "integer"
}
target_obj = 1
validate(instance=target_obj, schema=schema_obj)
generate_and_check(target_obj, schema_obj)

def test_invalid_typed_const(self):
schema_obj = {
"const": 1,
"type": "boolean"
}
with pytest.raises(ValidationError):
gen_json(schema=schema_obj)

def test_valid_enum_const(self):
schema_obj = {
"const": 1,
"enum": [1, 2, 3]
}
target_obj = 1
validate(instance=target_obj, schema=schema_obj)
generate_and_check(target_obj, schema_obj)

def test_invalid_enum_const(self):
schema_obj = {
"const": 1,
"enum": [2, 3]
}
with pytest.raises(ValidationError):
gen_json(schema=schema_obj)

def test_valid_typed_enum_const(self):
schema_obj = {
"const": 1,
"enum": [1, "2", 3],
"type": "integer"
}
target_obj = 1
validate(instance=target_obj, schema=schema_obj)
generate_and_check(target_obj, schema_obj)

@pytest.mark.parametrize(
"const",
[
"2", # right enum, wrong type
2, # wrong enum, right type
"3", # wrong enum, wrong type
]
)
def test_invalid_typed_enum_const(self, const):
schema_obj = {
"const": const,
"enum": [1, "2", 3],
"type": "integer"
}
with pytest.raises(ValidationError):
gen_json(schema=schema_obj)


class TestAdditionalProperties:

Expand Down
Loading