-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bug: "Phu Thai or Phuu Thai"
repeated for pht-Thai-TH
#17
Comments
This is caught with an updated schema: {
"$schema": "http://json-schema.org/schema#",
"$id": "https://github.com/silnrsi/langtags/raw/master/source/langtags_schema.json",
"$ref": "#/definitions/langtags",
"definitions": {
"langtags": {
"type": "array",
"items": { "oneOf": [
{"$ref": "#/definitions/langtag"},
{"$ref": "#/definitions/_globalvar"},
{"$ref": "#/definitions/_phonvar"},
{"$ref": "#/definitions/_version"},
{"$ref": "#/definitions/_conformance"}
] },
"additionalItems": false
},
"langtag": {
"type": "object",
"properties": {
"tag": {
"$ref": "#/definitions/bcp47"
},
"full": {
"type": "string"
},
"tags": {
"type": "array",
"items": { "$ref": "#/definitions/bcp47" },
"additionalItems": false,
"uniqueItems": true
},
"variants": {
"type": "array",
"items": { "$ref": "#/definitions/bcp47_variant" },
"additionalItems": false,
"uniqueItems": true
},
"iso639_3": {
"$ref": "#/definitions/iso639_3"
},
"region": {
"$ref": "#/definitions/iso3166_1"
},
"regions": {
"type": "array",
"items": { "$ref": "#/definitions/iso3166_1" },
"additionalItems": false,
"uniqueItems": true
},
"regionname": {
"type": "string"
},
"iana": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true
},
"name": {
"type": "string"
},
"names": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true
},
"localname": {
"type": "string"
},
"sldr": {
"type": "boolean"
},
"nophonvars": {
"type": "boolean"
},
"obsolete": {
"type": "boolean"
},
"script": {
"$ref": "#/definitions/iso15924"
},
"localnames": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true
},
"latnnames": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true
},
"suppress": {
"type": "boolean"
},
"unwritten": {
"type": "boolean"
},
"windows": {
"$ref": "#/definitions/bcp47"
},
"rod": { "type": "string"},
"macrolang": {
"$ref": "#/definitions/bcp47"
}
},
"required": ["full"],
"additionalProperties": false
},
"_globalvar": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"const": "_globalvar"
},
"variants": {
"type": "array",
"items": { "$ref": "#/definitions/bcp47_variant" },
"additionalItems": false
}
},
"required": ["tag", "variants"],
"additionalProperties": false
},
"_phonvar": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"const": "_phonvar"
},
"variants": {
"type": "array",
"items": { "$ref": "#/definitions/bcp47_variant" },
"additionalItems": false
}
},
"required": ["tag", "variants"],
"additionalProperties": false
},
"_version": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"const": "_version"
},
"api": {
"type": "string",
"pattern": "^\\d+\\.\\d+(\\.\\d+)?$"
},
"date": {
"type": "string",
"pattern": "^\\d+-\\d+-\\d+$"
}
},
"required": ["tag", "api", "date"],
"additionalProperties": false
},
"_conformance": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"const": "_conformance"
},
"regions": {
"type": "array",
"items": { "$ref": "#/definitions/iso3166_1"}
},
"scripts": {
"type": "array",
"items": { "$ref": "#/definitions/iso15924"}
}
},
"required": ["regions", "scripts"],
"additionalProperties": false
},
"bcp47": {
"type": "string",
"pattern": "^(((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)|(brv-(Thai|TH)-x-(dongluang|khongchiem|sakonnakon)|cek-(Latn-)?(MM-)?x-asangkhongso|cek-(Latn-)?(MM-)?x-khawngtuu|dao-(Latn-)?(MM-)?x-khengdaai|1901|1996|dgl-(Copt-)?(SD-)?x-oldnubian|ers-(Zzzz-)?(CN-)?x-ersushaba|fia-(Copt-)?(SD-)?x-oldnubian|mnc-(Mong-)?(CN-)?x-oldmanchu|nst-(Latn-)?(MM-)?x-moshanghawa|onw-(Copt-)?(SD-)?x-oldnubian|sgn-(Zxxx-)?MY-(Zxxx-)?MM|sgn-MY-Zxxx|sgn-Zxxx-MY-mm|tew-(Latn-)?(US-)?x-santaclara|tzo-(Latn-)?(MX-)?x-sanandres|tzo-(Latn-)?(MX-)?x-zinacantan|xnz-(Copt-)?(EG-)?x-oldnubian))|((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+))$"
},
"bcp47_variant": {
"type": "string",
"pattern": "^([0-9][a-zA-Z0-9]{3,8})|([a-zA-Z][a-zA-Z0-9]{4,8})$"
},
"iso639_3": {
"type": "string",
"pattern": "^[a-z]{3}$"
},
"iso3166_1": {
"type": "string",
"pattern": "^([A-Z]{2})|(\\d\\d\\d)$"
},
"iso15924": {
"type": "string",
"pattern": "^[A-Z]([a-z]{3})$"
}
}
} |
"Phu Thai or Phuu Thai"
repeated for pht-Thai-TH
"Phu Thai or Phuu Thai"
repeated for pht-Thai-TH
This change updates the schema as suggested in silnrsi#17 to detect duplicates in tags. It also adds a unit tests to detect duplicates that appear in multiple entries, as suggested in silnrsi#16. This change does not update the data and thus will cause failing tests.
…rsion The `langtags.json` version downloadable from https://github.com/silnrsi/langtags/ has several duplicated tags due to the following bugs: - silnrsi/langtags#15 - silnrsi/langtags#17 This change removes the duplicates from `langtags.json`, and also removes the `tpo-Zzzz-LA` tag according to the newer `langtags.json` in the https://github.com/silnrsi/langtags repo. See also silnrsi/langtags#16.
…rsion The `langtags.json` version downloadable from https://github.com/silnrsi/langtags/ has several duplicated tags due to the following bugs: - silnrsi/langtags#15 - silnrsi/langtags#17 This change removes the duplicates from `langtags.json`, and also removes the `tpo-Zzzz-LA` tag according to the newer `langtags.json` in the https://github.com/silnrsi/langtags repo. See also silnrsi/langtags#16.
The latnnames field is in 1:1 correspondence with the localnames fields. I.e. each entry is meant to be a transcription/literation of its corresponding entry in the localnames field. Thus it is valid for the latnnames list to have repeated entries. It is not valid for the localnames field to have repeated entries. |
Given the Thai involved, the latnname is valid if unhelpful, given we have no way of distinguishing two different p letters that are in the same tone class. |
If you want to modify the json schema accordingly (e.g. check the same number of entries in localnames as latnnames), then I'll update the schema test. |
I don't think it is possible in json schema to compare two entries like that. But I have updated the schema to remove the latnnames uniqueness requirement: {
"$schema": "http://json-schema.org/schema#",
"$id": "https://github.com/silnrsi/langtags/raw/master/source/langtags_schema.json",
"$ref": "#/definitions/langtags",
"definitions": {
"langtags": {
"type": "array",
"items": { "oneOf": [
{"$ref": "#/definitions/langtag"},
{"$ref": "#/definitions/_globalvar"},
{"$ref": "#/definitions/_phonvar"},
{"$ref": "#/definitions/_version"},
{"$ref": "#/definitions/_conformance"}
] },
"additionalItems": false
},
"langtag": {
"type": "object",
"properties": {
"tag": {
"$ref": "#/definitions/bcp47"
},
"full": {
"type": "string"
},
"tags": {
"type": "array",
"items": { "$ref": "#/definitions/bcp47" },
"additionalItems": false,
"uniqueItems": true
},
"variants": {
"type": "array",
"items": { "$ref": "#/definitions/bcp47_variant" },
"additionalItems": false,
"uniqueItems": true
},
"iso639_3": {
"$ref": "#/definitions/iso639_3"
},
"region": {
"$ref": "#/definitions/iso3166_1"
},
"regions": {
"type": "array",
"items": { "$ref": "#/definitions/iso3166_1" },
"additionalItems": false,
"uniqueItems": true
},
"regionname": {
"type": "string"
},
"iana": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true
},
"name": {
"type": "string"
},
"names": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true
},
"localname": {
"type": "string"
},
"sldr": {
"type": "boolean"
},
"nophonvars": {
"type": "boolean"
},
"obsolete": {
"type": "boolean"
},
"script": {
"$ref": "#/definitions/iso15924"
},
"localnames": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true
},
"latnnames": {
"type": "array",
"items": { "type": "string" }
},
"suppress": {
"type": "boolean"
},
"unwritten": {
"type": "boolean"
},
"windows": {
"$ref": "#/definitions/bcp47"
},
"rod": { "type": "string"},
"macrolang": {
"$ref": "#/definitions/bcp47"
}
},
"required": ["full"],
"additionalProperties": false
},
"_globalvar": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"const": "_globalvar"
},
"variants": {
"type": "array",
"items": { "$ref": "#/definitions/bcp47_variant" },
"additionalItems": false
}
},
"required": ["tag", "variants"],
"additionalProperties": false
},
"_phonvar": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"const": "_phonvar"
},
"variants": {
"type": "array",
"items": { "$ref": "#/definitions/bcp47_variant" },
"additionalItems": false
}
},
"required": ["tag", "variants"],
"additionalProperties": false
},
"_version": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"const": "_version"
},
"api": {
"type": "string",
"pattern": "^\\d+\\.\\d+(\\.\\d+)?$"
},
"date": {
"type": "string",
"pattern": "^\\d+-\\d+-\\d+$"
}
},
"required": ["tag", "api", "date"],
"additionalProperties": false
},
"_conformance": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"const": "_conformance"
},
"regions": {
"type": "array",
"items": { "$ref": "#/definitions/iso3166_1"}
},
"scripts": {
"type": "array",
"items": { "$ref": "#/definitions/iso15924"}
}
},
"required": ["regions", "scripts"],
"additionalProperties": false
},
"bcp47": {
"type": "string",
"pattern": "^(((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)|(brv-(Thai|TH)-x-(dongluang|khongchiem|sakonnakon)|cek-(Latn-)?(MM-)?x-asangkhongso|cek-(Latn-)?(MM-)?x-khawngtuu|dao-(Latn-)?(MM-)?x-khengdaai|1901|1996|dgl-(Copt-)?(SD-)?x-oldnubian|ers-(Zzzz-)?(CN-)?x-ersushaba|fia-(Copt-)?(SD-)?x-oldnubian|mnc-(Mong-)?(CN-)?x-oldmanchu|nst-(Latn-)?(MM-)?x-moshanghawa|onw-(Copt-)?(SD-)?x-oldnubian|sgn-(Zxxx-)?MY-(Zxxx-)?MM|sgn-MY-Zxxx|sgn-Zxxx-MY-mm|tew-(Latn-)?(US-)?x-santaclara|tzo-(Latn-)?(MX-)?x-sanandres|tzo-(Latn-)?(MX-)?x-zinacantan|xnz-(Copt-)?(EG-)?x-oldnubian))|((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+))$"
},
"bcp47_variant": {
"type": "string",
"pattern": "^([0-9][a-zA-Z0-9]{3,8})|([a-zA-Z][a-zA-Z0-9]{4,8})$"
},
"iso639_3": {
"type": "string",
"pattern": "^[a-z]{3}$"
},
"iso3166_1": {
"type": "string",
"pattern": "^([A-Z]{2})|(\\d\\d\\d)$"
},
"iso15924": {
"type": "string",
"pattern": "^[A-Z]([a-z]{3})$"
}
}
} |
latnnames don't have to be unique (see silnrsi#17).
Presume this should be:
The text was updated successfully, but these errors were encountered: