Skip to content

Commit

Permalink
Merge branch 'main' into json_string_schemas
Browse files Browse the repository at this point in the history
  • Loading branch information
hudson-ai authored Oct 29, 2024
2 parents 1fcd3f0 + dc5a080 commit fb4c57c
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 24 deletions.
64 changes: 52 additions & 12 deletions guidance/library/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,17 +165,17 @@ class ObjectKeywords(str, Enum):
JSONType.OBJECT: ObjectKeywords,
}

DEFS_KEYS = {"$defs", "definitions"}

IGNORED_KEYS = {
"$anchor",
"$defs",
"$schema",
"$id",
"id",
"$comment",
"title",
"description",
"default",
"definitions",
"description",
"examples",
}

Expand All @@ -188,7 +188,7 @@ class ObjectKeywords(str, Enum):
IGNORED_KEYS.add("discriminator")

WHITESPACE = {b" ", b"\t", b"\n", b"\r"}
VALID_KEYS = set(Keyword) | IGNORED_KEYS | DEFS_KEYS | set(NumberKeywords) | set(StringKeywords) | set(ArrayKeywords) | set(ObjectKeywords)
VALID_KEYS = set(Keyword) | set(NumberKeywords) | set(StringKeywords) | set(ArrayKeywords) | set(ObjectKeywords) | IGNORED_KEYS

FORMAT_PATTERNS: dict[str, Optional[str]] = {
# https://json-schema.org/understanding-json-schema/reference/string#built-in-formats
Expand Down Expand Up @@ -398,6 +398,11 @@ def validate_json_node_keys(node: Mapping[str, Any]):
)


def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]:
# Get the set of functional (non-ignored) keys that are siblings of the given key
return set(node.keys()) & VALID_KEYS - set(IGNORED_KEYS) - {key}


class GenJson:
item_separator = ", "
key_separator = ": "
Expand Down Expand Up @@ -723,7 +728,20 @@ def const(
lm,
*,
value: Union[None, bool, int, float, str, Mapping, Sequence],
instance_type: Optional[Union[str, Sequence[str]]] = None,
enum: Optional[Sequence[Union[None, bool, int, float, str, Mapping, Sequence]]] = None,
):
schema_to_validate_against: dict[str, Any] = {}
if instance_type is not None:
schema_to_validate_against["type"] = instance_type
if enum is not None:
schema_to_validate_against["enum"] = enum
if schema_to_validate_against:
# Raise a validation error if the value doesn't match the type
jsonschema.validate(
instance=value,
schema=schema_to_validate_against,
)
# Base case
if isinstance(value, (type(None), bool, int, float, str)):
return lm + json_dumps(value)
Expand Down Expand Up @@ -756,14 +774,18 @@ def enum(
self,
lm,
*,
options: Sequence[Mapping[str, Any]]
options: Sequence[Union[None, bool, int, float, str, Mapping, Sequence]],
instance_type: Optional[Union[str, Sequence[str]]] = None,
):
# TODO: can we support a whitespace-flexible version of this?
all_opts: list[GrammarFunction] = []
for opt in options:
all_opts.append(
self.const(value=opt)
)
for instance in options:
try:
grm = self.const(value=instance, instance_type=instance_type)
except jsonschema.ValidationError:
continue
all_opts.append(grm)
if not all_opts:
raise ValueError(f"No valid options found for enum with type {instance_type!r}: {options}")
return lm + select(options=all_opts)


Expand Down Expand Up @@ -811,29 +833,47 @@ def json(
validate_json_node_keys(json_schema)

if Keyword.ANYOF in json_schema:
sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF)
if sibling_keys:
raise NotImplementedError(f"anyOf with sibling keys is not yet supported. Got {sibling_keys}")
return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF])

if Keyword.ALLOF in json_schema:
sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF)
if sibling_keys:
raise NotImplementedError(f"allOf with sibling keys is not yet supported. Got {sibling_keys}")
allof_list = json_schema[Keyword.ALLOF]
if len(allof_list) != 1:
raise ValueError("Only support allOf with exactly one item")
return lm + self.json(json_schema=allof_list[0])

if Keyword.ONEOF in json_schema:
sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF)
if sibling_keys:
raise NotImplementedError(f"oneOf with sibling keys is not yet supported. Got {sibling_keys}")
oneof_list = json_schema[Keyword.ONEOF]
if len(oneof_list) == 1:
return lm + self.json(json_schema=oneof_list[0])
warnings.warn("oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases.")
return lm + self.anyOf(anyof_list=oneof_list)

if Keyword.REF in json_schema:
sibling_keys = get_sibling_keys(json_schema, Keyword.REF)
if sibling_keys:
raise NotImplementedError(f"$ref with sibling keys is not yet supported. Got {sibling_keys}")
return lm + self.ref(reference=json_schema[Keyword.REF])

if Keyword.CONST in json_schema:
return lm + self.const(value=json_schema[Keyword.CONST])
sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM}
if sibling_keys:
raise NotImplementedError(f"const with sibling keys is not yet supported. Got {sibling_keys}")
return lm + self.const(value=json_schema[Keyword.CONST], instance_type=json_schema.get(Keyword.TYPE, None), enum=json_schema.get(Keyword.ENUM, None))

if Keyword.ENUM in json_schema:
return lm + self.enum(options=json_schema[Keyword.ENUM])
sibling_keys = get_sibling_keys(json_schema, Keyword.ENUM) - {Keyword.TYPE}
if sibling_keys:
raise NotImplementedError(f"enum with sibling keys is not yet supported. Got {sibling_keys}")
return lm + self.enum(options=json_schema[Keyword.ENUM], instance_type=json_schema.get(Keyword.TYPE, None))

if Keyword.TYPE in json_schema:
target_types = cast(Union[str, Sequence[str]], json_schema[Keyword.TYPE])
Expand Down
133 changes: 121 additions & 12 deletions tests/unit/library/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1268,14 +1268,12 @@ def test_nested_refs(self, test_object, valid):
# ref valid, maxItems valid
({"foo": []}, True),
# ref valid, maxItems invalid
pytest.param(
*({"foo": [1, 2, 3]}, False),
marks=pytest.mark.xfail(reason="sibling keywords to ref are not yet supported"),
),
({"foo": [1, 2, 3]}, False),
# ref invalid
({"foo": "string"}, False),
],
)
@pytest.mark.xfail(reason="sibling keywords to ref are not yet supported")
def test_ref_applies_alongside_sibling_keywords(self, test_object, valid):
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
Expand Down Expand Up @@ -1562,12 +1560,10 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct(
# invalid on outer field
({"foo": {"bar": "a"}, "bar": 1}, False),
# valid on both fields
pytest.param(
*({"foo": {"bar": "a"}, "bar": "a"}, True),
marks=pytest.mark.xfail(reason="refs with sibling keywords are not yet supported; foo here is being seen as an additionalProperty before bar"),
),
({"foo": {"bar": "a"}, "bar": "a"}, True),
],
)
@pytest.mark.xfail(reason="refs with sibling keywords are not yet supported")
def test_refs_with_relative_uris_and_defs(self, test_object, valid):
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
Expand Down Expand Up @@ -1597,12 +1593,10 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid):
# invalid on outer field
({"foo": {"bar": "a"}, "bar": 1}, False),
# valid on both fields
pytest.param(
*({"foo": {"bar": "a"}, "bar": "a"}, True),
marks=pytest.mark.xfail(reason="refs with sibling keywords are not yet supported; foo here is being seen as an additionalProperty before bar"),
),
({"foo": {"bar": "a"}, "bar": "a"}, True),
],
)
@pytest.mark.xfail(reason="refs with sibling keywords are not yet supported")
def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid):
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
Expand Down Expand Up @@ -2357,6 +2351,60 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes)
schema_obj=schema_obj,
)

@pytest.mark.parametrize(
"obj, valid",
[
(1, True),
(2, False),
("2", False),
("1", False),
(True, False),
]
)
def test_typed_enum_single_type(self, obj, valid):
schema_obj = {
"enum": [1, "2", True],
"type": "integer"
}
if valid:
validate(instance=obj, schema=schema_obj)
generate_and_check(obj, schema_obj)
else:
with pytest.raises(ValidationError):
validate(instance=obj, schema=schema_obj)
check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj)

@pytest.mark.parametrize(
"obj, valid",
[
(1, True),
(2, False),
("2", True),
("1", False),
(True, False),
]
)
def test_typed_enum_multiple_types(self, obj, valid):
schema_obj = {
"enum": [1, "2", True],
"type": ["integer", "string"]
}
if valid:
validate(instance=obj, schema=schema_obj)
generate_and_check(obj, schema_obj)
else:
with pytest.raises(ValidationError):
validate(instance=obj, schema=schema_obj)
check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj)

def test_invalid_typed_enum(self):
schema_obj = {
"enum": [1, "2"],
"type": "boolean"
}
with pytest.raises(ValueError) as ve:
gen_json(schema=schema_obj)
assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']"

class TestConst:
def test_constant_int(self):
Expand Down Expand Up @@ -2416,6 +2464,67 @@ def test_constant_precedence(self):
schema_obj=schema_obj,
)

def test_valid_typed_const(self):
schema_obj = {
"const": 1,
"type": "integer"
}
target_obj = 1
validate(instance=target_obj, schema=schema_obj)
generate_and_check(target_obj, schema_obj)

def test_invalid_typed_const(self):
schema_obj = {
"const": 1,
"type": "boolean"
}
with pytest.raises(ValidationError):
gen_json(schema=schema_obj)

def test_valid_enum_const(self):
schema_obj = {
"const": 1,
"enum": [1, 2, 3]
}
target_obj = 1
validate(instance=target_obj, schema=schema_obj)
generate_and_check(target_obj, schema_obj)

def test_invalid_enum_const(self):
schema_obj = {
"const": 1,
"enum": [2, 3]
}
with pytest.raises(ValidationError):
gen_json(schema=schema_obj)

def test_valid_typed_enum_const(self):
schema_obj = {
"const": 1,
"enum": [1, "2", 3],
"type": "integer"
}
target_obj = 1
validate(instance=target_obj, schema=schema_obj)
generate_and_check(target_obj, schema_obj)

@pytest.mark.parametrize(
"const",
[
"2", # right enum, wrong type
2, # wrong enum, right type
"3", # wrong enum, wrong type
]
)
def test_invalid_typed_enum_const(self, const):
schema_obj = {
"const": const,
"enum": [1, "2", 3],
"type": "integer"
}
with pytest.raises(ValidationError):
gen_json(schema=schema_obj)


class TestAdditionalProperties:

Expand Down

0 comments on commit fb4c57c

Please sign in to comment.