diff --git a/guidance/library/_json.py b/guidance/library/_json.py index eb9c4a7bc..0ca544726 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -165,17 +165,17 @@ class ObjectKeywords(str, Enum): JSONType.OBJECT: ObjectKeywords, } -DEFS_KEYS = {"$defs", "definitions"} - IGNORED_KEYS = { "$anchor", + "$defs", "$schema", "$id", "id", "$comment", "title", - "description", "default", + "definitions", + "description", "examples", } @@ -188,7 +188,7 @@ class ObjectKeywords(str, Enum): IGNORED_KEYS.add("discriminator") WHITESPACE = {b" ", b"\t", b"\n", b"\r"} -VALID_KEYS = set(Keyword) | IGNORED_KEYS | DEFS_KEYS | set(NumberKeywords) | set(StringKeywords) | set(ArrayKeywords) | set(ObjectKeywords) +VALID_KEYS = set(Keyword) | set(NumberKeywords) | set(StringKeywords) | set(ArrayKeywords) | set(ObjectKeywords) | IGNORED_KEYS FORMAT_PATTERNS: dict[str, Optional[str]] = { # https://json-schema.org/understanding-json-schema/reference/string#built-in-formats @@ -398,6 +398,11 @@ def validate_json_node_keys(node: Mapping[str, Any]): ) +def get_sibling_keys(node: Mapping[str, Any], key: str) -> set[str]: + # Get the set of functional (non-ignored) keys that are siblings of the given key + return set(node.keys()) & VALID_KEYS - set(IGNORED_KEYS) - {key} + + class GenJson: item_separator = ", " key_separator = ": " @@ -723,7 +728,20 @@ def const( lm, *, value: Union[None, bool, int, float, str, Mapping, Sequence], + instance_type: Optional[Union[str, Sequence[str]]] = None, + enum: Optional[Sequence[Union[None, bool, int, float, str, Mapping, Sequence]]] = None, ): + schema_to_validate_against: dict[str, Any] = {} + if instance_type is not None: + schema_to_validate_against["type"] = instance_type + if enum is not None: + schema_to_validate_against["enum"] = enum + if schema_to_validate_against: + # Raise a validation error if the value doesn't match the type + jsonschema.validate( + instance=value, + schema=schema_to_validate_against, + ) # Base case if isinstance(value, (type(None), bool, int, float, str)): return lm + json_dumps(value) @@ -756,14 +774,18 @@ def enum( self, lm, *, - options: Sequence[Mapping[str, Any]] + options: Sequence[Union[None, bool, int, float, str, Mapping, Sequence]], + instance_type: Optional[Union[str, Sequence[str]]] = None, ): - # TODO: can we support a whitespace-flexible version of this? all_opts: list[GrammarFunction] = [] - for opt in options: - all_opts.append( - self.const(value=opt) - ) + for instance in options: + try: + grm = self.const(value=instance, instance_type=instance_type) + except jsonschema.ValidationError: + continue + all_opts.append(grm) + if not all_opts: + raise ValueError(f"No valid options found for enum with type {instance_type!r}: {options}") return lm + select(options=all_opts) @@ -811,15 +833,24 @@ def json( validate_json_node_keys(json_schema) if Keyword.ANYOF in json_schema: + sibling_keys = get_sibling_keys(json_schema, Keyword.ANYOF) + if sibling_keys: + raise NotImplementedError(f"anyOf with sibling keys is not yet supported. Got {sibling_keys}") return lm + self.anyOf(anyof_list=json_schema[Keyword.ANYOF]) if Keyword.ALLOF in json_schema: + sibling_keys = get_sibling_keys(json_schema, Keyword.ALLOF) + if sibling_keys: + raise NotImplementedError(f"allOf with sibling keys is not yet supported. Got {sibling_keys}") allof_list = json_schema[Keyword.ALLOF] if len(allof_list) != 1: raise ValueError("Only support allOf with exactly one item") return lm + self.json(json_schema=allof_list[0]) if Keyword.ONEOF in json_schema: + sibling_keys = get_sibling_keys(json_schema, Keyword.ONEOF) + if sibling_keys: + raise NotImplementedError(f"oneOf with sibling keys is not yet supported. Got {sibling_keys}") oneof_list = json_schema[Keyword.ONEOF] if len(oneof_list) == 1: return lm + self.json(json_schema=oneof_list[0]) @@ -827,13 +858,22 @@ def json( return lm + self.anyOf(anyof_list=oneof_list) if Keyword.REF in json_schema: + sibling_keys = get_sibling_keys(json_schema, Keyword.REF) + if sibling_keys: + raise NotImplementedError(f"$ref with sibling keys is not yet supported. Got {sibling_keys}") return lm + self.ref(reference=json_schema[Keyword.REF]) if Keyword.CONST in json_schema: - return lm + self.const(value=json_schema[Keyword.CONST]) + sibling_keys = get_sibling_keys(json_schema, Keyword.CONST) - {Keyword.TYPE, Keyword.ENUM} + if sibling_keys: + raise NotImplementedError(f"const with sibling keys is not yet supported. Got {sibling_keys}") + return lm + self.const(value=json_schema[Keyword.CONST], instance_type=json_schema.get(Keyword.TYPE, None), enum=json_schema.get(Keyword.ENUM, None)) if Keyword.ENUM in json_schema: - return lm + self.enum(options=json_schema[Keyword.ENUM]) + sibling_keys = get_sibling_keys(json_schema, Keyword.ENUM) - {Keyword.TYPE} + if sibling_keys: + raise NotImplementedError(f"enum with sibling keys is not yet supported. Got {sibling_keys}") + return lm + self.enum(options=json_schema[Keyword.ENUM], instance_type=json_schema.get(Keyword.TYPE, None)) if Keyword.TYPE in json_schema: target_types = cast(Union[str, Sequence[str]], json_schema[Keyword.TYPE]) diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py index 1151624b2..d320fdb4f 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/test_json.py @@ -1268,14 +1268,12 @@ def test_nested_refs(self, test_object, valid): # ref valid, maxItems valid ({"foo": []}, True), # ref valid, maxItems invalid - pytest.param( - *({"foo": [1, 2, 3]}, False), - marks=pytest.mark.xfail(reason="sibling keywords to ref are not yet supported"), - ), + ({"foo": [1, 2, 3]}, False), # ref invalid ({"foo": "string"}, False), ], ) + @pytest.mark.xfail(reason="sibling keywords to ref are not yet supported") def test_ref_applies_alongside_sibling_keywords(self, test_object, valid): schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", @@ -1562,12 +1560,10 @@ def test_naive_replacement_of_ref_with_its_destination_is_not_correct( # invalid on outer field ({"foo": {"bar": "a"}, "bar": 1}, False), # valid on both fields - pytest.param( - *({"foo": {"bar": "a"}, "bar": "a"}, True), - marks=pytest.mark.xfail(reason="refs with sibling keywords are not yet supported; foo here is being seen as an additionalProperty before bar"), - ), + ({"foo": {"bar": "a"}, "bar": "a"}, True), ], ) + @pytest.mark.xfail(reason="refs with sibling keywords are not yet supported") def test_refs_with_relative_uris_and_defs(self, test_object, valid): schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", @@ -1597,12 +1593,10 @@ def test_refs_with_relative_uris_and_defs(self, test_object, valid): # invalid on outer field ({"foo": {"bar": "a"}, "bar": 1}, False), # valid on both fields - pytest.param( - *({"foo": {"bar": "a"}, "bar": "a"}, True), - marks=pytest.mark.xfail(reason="refs with sibling keywords are not yet supported; foo here is being seen as an additionalProperty before bar"), - ), + ({"foo": {"bar": "a"}, "bar": "a"}, True), ], ) + @pytest.mark.xfail(reason="refs with sibling keywords are not yet supported") def test_relative_refs_with_absolute_uris_and_defs(self, test_object, valid): schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", @@ -2357,6 +2351,60 @@ def test_bad_prefix_enum(self, bad_obj, good_bytes, failure_byte, allowed_bytes) schema_obj=schema_obj, ) + @pytest.mark.parametrize( + "obj, valid", + [ + (1, True), + (2, False), + ("2", False), + ("1", False), + (True, False), + ] + ) + def test_typed_enum_single_type(self, obj, valid): + schema_obj = { + "enum": [1, "2", True], + "type": "integer" + } + if valid: + validate(instance=obj, schema=schema_obj) + generate_and_check(obj, schema_obj) + else: + with pytest.raises(ValidationError): + validate(instance=obj, schema=schema_obj) + check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj) + + @pytest.mark.parametrize( + "obj, valid", + [ + (1, True), + (2, False), + ("2", True), + ("1", False), + (True, False), + ] + ) + def test_typed_enum_multiple_types(self, obj, valid): + schema_obj = { + "enum": [1, "2", True], + "type": ["integer", "string"] + } + if valid: + validate(instance=obj, schema=schema_obj) + generate_and_check(obj, schema_obj) + else: + with pytest.raises(ValidationError): + validate(instance=obj, schema=schema_obj) + check_match_failure(bad_string=json_dumps(obj), schema_obj=schema_obj) + + def test_invalid_typed_enum(self): + schema_obj = { + "enum": [1, "2"], + "type": "boolean" + } + with pytest.raises(ValueError) as ve: + gen_json(schema=schema_obj) + assert ve.value.args[0] == "No valid options found for enum with type 'boolean': [1, '2']" class TestConst: def test_constant_int(self): @@ -2416,6 +2464,67 @@ def test_constant_precedence(self): schema_obj=schema_obj, ) + def test_valid_typed_const(self): + schema_obj = { + "const": 1, + "type": "integer" + } + target_obj = 1 + validate(instance=target_obj, schema=schema_obj) + generate_and_check(target_obj, schema_obj) + + def test_invalid_typed_const(self): + schema_obj = { + "const": 1, + "type": "boolean" + } + with pytest.raises(ValidationError): + gen_json(schema=schema_obj) + + def test_valid_enum_const(self): + schema_obj = { + "const": 1, + "enum": [1, 2, 3] + } + target_obj = 1 + validate(instance=target_obj, schema=schema_obj) + generate_and_check(target_obj, schema_obj) + + def test_invalid_enum_const(self): + schema_obj = { + "const": 1, + "enum": [2, 3] + } + with pytest.raises(ValidationError): + gen_json(schema=schema_obj) + + def test_valid_typed_enum_const(self): + schema_obj = { + "const": 1, + "enum": [1, "2", 3], + "type": "integer" + } + target_obj = 1 + validate(instance=target_obj, schema=schema_obj) + generate_and_check(target_obj, schema_obj) + + @pytest.mark.parametrize( + "const", + [ + "2", # right enum, wrong type + 2, # wrong enum, right type + "3", # wrong enum, wrong type + ] + ) + def test_invalid_typed_enum_const(self, const): + schema_obj = { + "const": const, + "enum": [1, "2", 3], + "type": "integer" + } + with pytest.raises(ValidationError): + gen_json(schema=schema_obj) + class TestAdditionalProperties: