From 48df2d01d487ec573a68a315e7feb7ee87532d5c Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Wed, 4 Dec 2024 11:59:06 +0200
Subject: [PATCH 01/12] Initial commit

---
 validation/check_dataset.py | 25 +++++++++++++++++++++++++
 validation/requirements.txt |  1 +
 2 files changed, 26 insertions(+)
 create mode 100644 validation/check_dataset.py
 create mode 100644 validation/requirements.txt

diff --git a/validation/check_dataset.py b/validation/check_dataset.py
new file mode 100644
index 0000000..4102f41
--- /dev/null
+++ b/validation/check_dataset.py
@@ -0,0 +1,25 @@
+# Cohere For AI Community, Danylo Boiko, 2024
+
+from argparse import ArgumentParser
+
+from rich.console import Console
+
+
+class DatasetValidator:
+    def __init__(self, json_path: str, language_code: str) -> None:
+        self.json_path = json_path
+        self.language_code = language_code.lower()
+        self.console = Console()
+
+    def validate(self):
+        pass
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument("--json_path", type=str, required=True, help="Path to the JSON file to be validated")
+    parser.add_argument("--language_code", type=str, required=True, help="The language code for the dataset")
+    args = parser.parse_args()
+
+    validator = DatasetValidator(args.json_path, args.language_code)
+    validator.validate()
diff --git a/validation/requirements.txt b/validation/requirements.txt
new file mode 100644
index 0000000..c94be38
--- /dev/null
+++ b/validation/requirements.txt
@@ -0,0 +1 @@
+rich
\ No newline at end of file

From 34341d1590b2a49b5773917c0c39f2666d423e95 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Wed, 4 Dec 2024 12:01:54 +0200
Subject: [PATCH 02/12] Rename folder

---
 {validation => validator}/check_dataset.py | 0
 {validation => validator}/requirements.txt | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename {validation => validator}/check_dataset.py (100%)
 rename {validation => validator}/requirements.txt (100%)

diff --git a/validation/check_dataset.py b/validator/check_dataset.py
similarity index 100%
rename from validation/check_dataset.py
rename to validator/check_dataset.py
diff --git a/validation/requirements.txt b/validator/requirements.txt
similarity index 100%
rename from validation/requirements.txt
rename to validator/requirements.txt

From 2e0fe63f27bf3c1476abf291742ea145ed0be8c2 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Fri, 6 Dec 2024 23:11:16 +0200
Subject: [PATCH 03/12] Add generic validation pipeline

---
 validator/check_dataset.py | 86 ++++++++++++++++++++++++++++++++++----
 1 file changed, 77 insertions(+), 9 deletions(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index 4102f41..ff6f1d5 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -1,25 +1,93 @@
 # Cohere For AI Community, Danylo Boiko, 2024
 
-from argparse import ArgumentParser
+import json
+import argparse
 
 from rich.console import Console
+from rich.panel import Panel
+from rich.syntax import Syntax
+from rich.text import Text
+from rich.tree import Tree
+
+
+class ValidationError:
+    def __init__(self, entity_index: int, message: str) -> None:
+        self.entity_index = entity_index
+        self.message = message
 
 
 class DatasetValidator:
-    def __init__(self, json_path: str, language_code: str) -> None:
-        self.json_path = json_path
-        self.language_code = language_code.lower()
-        self.console = Console()
+    def __init__(self, json_file: str, language_code: str) -> None:
+        self.json_file: str = json_file
+        self.json_entries: list[dict] = []
+        self.language_code: str = language_code.lower()
+        self.console: Console = Console()
+        self.errors: list[ValidationError] = []
+
+    def validate(self) -> None:
+        self.console.print("Starting validation...", style="bold green")
+        self.console.print(f"JSON file: {self.json_file}", style="cyan")
+        self.console.print(f"Language code: {self.language_code}", style="cyan")
+
+        if not self._load_json():
+            return
+
+        self._validate_entries()
+
+        self._print_validation_report()
+
+    def _load_json(self) -> bool:
+        try:
+            with open(self.json_file, "r", encoding="utf-8") as file:
+                entries = json.load(file)
+
+                if not isinstance(entries, list):
+                    raise ValueError(f"The file must contain a JSON array (list of entries)")
+
+                self.json_entries = entries
+
+            return True
+        except Exception as error:
+            self.console.print(f"Error loading file {self.json_file}: {error}", style="red")
 
-    def validate(self):
+        return False
+
+    def _validate_entries(self) -> None:
         pass
 
+    def _print_validation_report(self) -> None:
+        if len(self.errors) == 0:
+            return self.console.print("Congratulations, the JSON file is valid!", style="green")
+
+        self.console.print("The following errors were found, fix them and try again:", style="red")
+
+        for error in self.errors:
+            self.console.print(Panel(self._create_error_tree(error), expand=False, border_style="red"))
+
+    def _create_error_tree(self, error: ValidationError) -> Tree:
+        entry = self.json_entries[error.entity_index]
+
+        tree = Tree(f"Error in entry with index {error.entity_index}", style="red")
+        tree.add(Text(error.message, style="yellow"))
+
+        question_node = tree.add("Question")
+        question_node.add(Syntax(entry.get("question", "[N/A]"), "text", word_wrap=True))
+
+        options_node = tree.add("Options")
+        for option_num, option_value in enumerate(entry.get("options", []), 1):
+            options_node.add(f"{option_num}. {option_value}")
+
+        answer_node = tree.add("Answer")
+        answer_node.add(str(entry.get("answer", "[N/A]")))
+
+        return tree
+
 
 if __name__ == '__main__':
-    parser = ArgumentParser()
-    parser.add_argument("--json_path", type=str, required=True, help="Path to the JSON file to be validated")
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--json_file", type=str, required=True, help="Path to the JSON file to be validated")
     parser.add_argument("--language_code", type=str, required=True, help="The language code for the dataset")
     args = parser.parse_args()
 
-    validator = DatasetValidator(args.json_path, args.language_code)
+    validator = DatasetValidator(args.json_file, args.language_code)
     validator.validate()

From 21c037fed601e5c544e2d1b04e27c52755eeefb0 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Sun, 8 Dec 2024 15:28:09 +0200
Subject: [PATCH 04/12] Add data validation

---
 validator/check_dataset.py | 129 +++++++++++++++++++++++++++++++++----
 validator/requirements.txt |   3 +-
 2 files changed, 117 insertions(+), 15 deletions(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index ff6f1d5..0a3b49b 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -3,6 +3,10 @@
 import json
 import argparse
 
+from typing import Union, Literal, Optional
+
+from pydantic import BaseModel, ValidationError, field_validator
+from pydantic_core.core_schema import ValidationInfo
 from rich.console import Console
 from rich.panel import Panel
 from rich.syntax import Syntax
@@ -10,11 +14,100 @@
 from rich.tree import Tree
 
 
-class ValidationError:
-    def __init__(self, entity_index: int, message: str) -> None:
-        self.entity_index = entity_index
+class EntrySchema(BaseModel):
+    language: str
+    country: str
+    file_name: str
+    source: str
+    license: str
+    level: str
+    category_en: str
+    category_original_lang: str
+    original_question_num: Union[int, str]
+    question: str
+    options: list[str]
+    answer: int
+    image_png: Optional[str]
+    image_information: Literal["useful", "essential"]
+    image_type: Literal["symbols", "figures", "graph", "table", "text"]
+    parallel_question_num: Optional[Union[int, str]]
+
+    @staticmethod
+    def _validate_string(value: str) -> str:
+        if len(value.strip()) == 0:
+            raise ValueError("Value cannot be empty or whitespace")
+
+        if value.startswith(" ") or value.endswith(" "):
+            raise ValueError("Value cannot have leading or trailing spaces")
+
+        return value
+
+    @staticmethod
+    def _validate_uniqueness(values: list) -> list:
+        if len(set(values)) != len(values):
+            raise ValueError("All values must be unique")
+
+        return values
+
+    @staticmethod
+    def _validate_length(values: list, expected_length) -> list:
+        length = len(values)
+
+        if length != expected_length:
+            raise ValueError(f"Expected {expected_length} values, but got {length}")
+
+        return values
+
+    @field_validator("language")
+    def validate_language(cls, language: str, config: ValidationInfo) -> str:
+        expected_language = config.context.get("language")
+
+        if language != expected_language:
+            raise ValueError(f"Expected '{expected_language}', but got '{language}'")
+
+        return cls._validate_string(language)
+
+    @field_validator("options")
+    def validate_options(cls, options: list[str]) -> list[str]:
+        for option in options:
+            cls._validate_string(option)
+
+        cls._validate_uniqueness(options)
+
+        return cls._validate_length(options, 4)
+
+    @field_validator("answer")
+    def validate_answer(cls, answer: int, config: ValidationInfo) -> int:
+        options_count = len(config.data.get("options", []))
+
+        if options_count != 0 and answer not in range(options_count):
+            raise ValueError(f"Expected value from 0 to {options_count - 1}, but got {answer}")
+
+        return answer
+
+    @field_validator(
+        "country", "file_name", "source", "license", "level", "category_en", "category_original_lang",
+        "original_question_num", "question", "image_png", "parallel_question_num"
+    )
+    def validate_string_fields(cls, value: Optional[str]) -> Optional[str]:
+        return cls._validate_string(value) if isinstance(value, str) else value
+
+    class Config:
+        extra = "forbid"
+
+
+class EntryError:
+    def __init__(self, index: int, location: tuple | None, message: str) -> None:
+        self.index = index
+        self.location = location
         self.message = message
 
+    def __str__(self) -> str:
+        if self.location:
+            return f"Location: {str(self.location).strip("(,)")}, error: {self.message.lower()}"
+
+        return self.message
+
 
 class DatasetValidator:
     def __init__(self, json_file: str, language_code: str) -> None:
@@ -22,10 +115,10 @@ def __init__(self, json_file: str, language_code: str) -> None:
         self.json_entries: list[dict] = []
         self.language_code: str = language_code.lower()
         self.console: Console = Console()
-        self.errors: list[ValidationError] = []
+        self.errors: list[EntryError] = []
 
     def validate(self) -> None:
-        self.console.print("Starting validation...", style="bold green")
+        self.console.print("Starting validation...", style="green")
         self.console.print(f"JSON file: {self.json_file}", style="cyan")
         self.console.print(f"Language code: {self.language_code}", style="cyan")
 
@@ -47,13 +140,21 @@ def _load_json(self) -> bool:
                 self.json_entries = entries
 
             return True
-        except Exception as error:
-            self.console.print(f"Error loading file {self.json_file}: {error}", style="red")
+        except Exception as e:
+            self.console.print(f"Error loading file {self.json_file}: {e}", style="red")
 
         return False
 
     def _validate_entries(self) -> None:
-        pass
+        for index, entry in enumerate(self.json_entries):
+            try:
+                EntrySchema.model_validate(entry, context={
+                    "language": self.language_code
+                })
+            except ValidationError as e:
+                self.errors.extend([
+                    EntryError(index, error.get("loc", None), error.get("msg")) for error in e.errors()
+                ])
 
     def _print_validation_report(self) -> None:
         if len(self.errors) == 0:
@@ -64,21 +165,21 @@ def _print_validation_report(self) -> None:
         for error in self.errors:
             self.console.print(Panel(self._create_error_tree(error), expand=False, border_style="red"))
 
-    def _create_error_tree(self, error: ValidationError) -> Tree:
-        entry = self.json_entries[error.entity_index]
+    def _create_error_tree(self, error: EntryError) -> Tree:
+        entry = self.json_entries[error.index]
 
-        tree = Tree(f"Error in entry with index {error.entity_index}", style="red")
-        tree.add(Text(error.message, style="yellow"))
+        tree = Tree(f"Error in entry with index {error.index}", style="red")
+        tree.add(Text(str(error), style="yellow"))
 
         question_node = tree.add("Question")
-        question_node.add(Syntax(entry.get("question", "[N/A]"), "text", word_wrap=True))
+        question_node.add(Syntax(entry.get("question", "N/A"), "text", word_wrap=True))
 
         options_node = tree.add("Options")
         for option_num, option_value in enumerate(entry.get("options", []), 1):
             options_node.add(f"{option_num}. {option_value}")
 
         answer_node = tree.add("Answer")
-        answer_node.add(str(entry.get("answer", "[N/A]")))
+        answer_node.add(str(entry.get("answer", "N/A")))
 
         return tree
 
diff --git a/validator/requirements.txt b/validator/requirements.txt
index c94be38..6376362 100644
--- a/validator/requirements.txt
+++ b/validator/requirements.txt
@@ -1 +1,2 @@
-rich
\ No newline at end of file
+rich
+pydantic
\ No newline at end of file

From c55e83168b600a75d4517e193db5d418e0e2c4e1 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Sun, 8 Dec 2024 15:45:16 +0200
Subject: [PATCH 05/12] Add minor refactoring

---
 validator/check_dataset.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index 0a3b49b..89ffbf5 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -14,6 +14,9 @@
 from rich.tree import Tree
 
 
+EXPECTED_OPTIONS_COUNT = 4
+
+
 class EntrySchema(BaseModel):
     language: str
     country: str
@@ -34,7 +37,7 @@ class EntrySchema(BaseModel):
 
     @staticmethod
     def _validate_string(value: str) -> str:
-        if len(value.strip()) == 0:
+        if not value.strip():
             raise ValueError("Value cannot be empty or whitespace")
 
         if value.startswith(" ") or value.endswith(" "):
@@ -43,24 +46,22 @@ def _validate_string(value: str) -> str:
         return value
 
     @staticmethod
-    def _validate_uniqueness(values: list) -> list:
+    def _validate_list_uniqueness(values: list) -> list:
         if len(set(values)) != len(values):
             raise ValueError("All values must be unique")
 
         return values
 
     @staticmethod
-    def _validate_length(values: list, expected_length) -> list:
-        length = len(values)
-
-        if length != expected_length:
-            raise ValueError(f"Expected {expected_length} values, but got {length}")
+    def _validate_list_length(values: list, expected_length: int) -> list:
+        if len(values) != expected_length:
+            raise ValueError(f"Expected {expected_length} values, but got {len(values)}")
 
         return values
 
     @field_validator("language")
     def validate_language(cls, language: str, config: ValidationInfo) -> str:
-        expected_language = config.context.get("language")
+        expected_language = config.context.get("expected_language")
 
         if language != expected_language:
             raise ValueError(f"Expected '{expected_language}', but got '{language}'")
@@ -72,15 +73,15 @@ def validate_options(cls, options: list[str]) -> list[str]:
         for option in options:
             cls._validate_string(option)
 
-        cls._validate_uniqueness(options)
+        cls._validate_list_uniqueness(options)
 
-        return cls._validate_length(options, 4)
+        return cls._validate_list_length(options, EXPECTED_OPTIONS_COUNT)
 
     @field_validator("answer")
     def validate_answer(cls, answer: int, config: ValidationInfo) -> int:
         options_count = len(config.data.get("options", []))
 
-        if options_count != 0 and answer not in range(options_count):
+        if options_count > 0 and not (0 <= answer < options_count):
             raise ValueError(f"Expected value from 0 to {options_count - 1}, but got {answer}")
 
         return answer
@@ -149,7 +150,7 @@ def _validate_entries(self) -> None:
         for index, entry in enumerate(self.json_entries):
             try:
                 EntrySchema.model_validate(entry, context={
-                    "language": self.language_code
+                    "expected_language": self.language_code
                 })
             except ValidationError as e:
                 self.errors.extend([

From c4ef8311c82b8457e598b3be74d18f528ea05962 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Sun, 8 Dec 2024 15:49:28 +0200
Subject: [PATCH 06/12] Inline validator methods

---
 validator/check_dataset.py | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index 89ffbf5..a7693e2 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -45,20 +45,6 @@ def _validate_string(value: str) -> str:
 
         return value
 
-    @staticmethod
-    def _validate_list_uniqueness(values: list) -> list:
-        if len(set(values)) != len(values):
-            raise ValueError("All values must be unique")
-
-        return values
-
-    @staticmethod
-    def _validate_list_length(values: list, expected_length: int) -> list:
-        if len(values) != expected_length:
-            raise ValueError(f"Expected {expected_length} values, but got {len(values)}")
-
-        return values
-
     @field_validator("language")
     def validate_language(cls, language: str, config: ValidationInfo) -> str:
         expected_language = config.context.get("expected_language")
@@ -73,9 +59,13 @@ def validate_options(cls, options: list[str]) -> list[str]:
         for option in options:
             cls._validate_string(option)
 
-        cls._validate_list_uniqueness(options)
+        if len(set(options)) != len(options):
+            raise ValueError("All values must be unique")
+
+        if len(options) != EXPECTED_OPTIONS_COUNT:
+            raise ValueError(f"Expected {EXPECTED_OPTIONS_COUNT} values, but got {len(options)}")
 
-        return cls._validate_list_length(options, EXPECTED_OPTIONS_COUNT)
+        return options
 
     @field_validator("answer")
     def validate_answer(cls, answer: int, config: ValidationInfo) -> int:

From f8deb9e49d81afa2d2a76b73025ff5a85ccac539 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Sun, 8 Dec 2024 15:51:42 +0200
Subject: [PATCH 07/12] Add minor refactoring

---
 validator/check_dataset.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index a7693e2..39a1398 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -129,12 +129,10 @@ def _load_json(self) -> bool:
                     raise ValueError(f"The file must contain a JSON array (list of entries)")
 
                 self.json_entries = entries
-
             return True
         except Exception as e:
             self.console.print(f"Error loading file {self.json_file}: {e}", style="red")
-
-        return False
+            return False
 
     def _validate_entries(self) -> None:
         for index, entry in enumerate(self.json_entries):

From 2c531bd011911bcbf358a79ad637e36f3d1945be Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Sun, 8 Dec 2024 15:53:49 +0200
Subject: [PATCH 08/12] Make error location optional

---
 validator/check_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index 39a1398..97146f3 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -88,7 +88,7 @@ class Config:
 
 
 class EntryError:
-    def __init__(self, index: int, location: tuple | None, message: str) -> None:
+    def __init__(self, index: int, location: Optional[tuple], message: str) -> None:
         self.index = index
         self.location = location
         self.message = message

From a5d7ff7d812544984caf8fa7d945687d367bf1da Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Sun, 8 Dec 2024 16:04:26 +0200
Subject: [PATCH 09/12] Improve code style

---
 validator/check_dataset.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index 97146f3..a2e7bfe 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -117,7 +117,6 @@ def validate(self) -> None:
             return
 
         self._validate_entries()
-
         self._print_validation_report()
 
     def _load_json(self) -> bool:

From 6098f53410802a9f52040fc33899420cb45acde6 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Mon, 9 Dec 2024 14:04:08 +0200
Subject: [PATCH 10/12] Add validation for image data

---
 validator/check_dataset.py | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index a2e7bfe..f23ee9d 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -5,7 +5,7 @@
 
 from typing import Union, Literal, Optional
 
-from pydantic import BaseModel, ValidationError, field_validator
+from pydantic import BaseModel, ValidationError, field_validator, model_validator
 from pydantic_core.core_schema import ValidationInfo
 from rich.console import Console
 from rich.panel import Panel
@@ -31,9 +31,9 @@ class EntrySchema(BaseModel):
     options: list[str]
     answer: int
     image_png: Optional[str]
-    image_information: Literal["useful", "essential"]
-    image_type: Literal["symbols", "figures", "graph", "table", "text"]
-    parallel_question_num: Optional[Union[int, str]]
+    image_information: Optional[Literal["useful", "essential"]]
+    image_type: Optional[Literal["graph", "table", "diagram", "scientific formula", "text", "figure", "map", "photo"]]
+    parallel_question_id: Optional[tuple[str, int]]
 
     @staticmethod
     def _validate_string(value: str) -> str:
@@ -76,13 +76,31 @@ def validate_answer(cls, answer: int, config: ValidationInfo) -> int:
 
         return answer
 
+    @field_validator("parallel_question_id")
+    def validate_parallel_question_id(cls, parallel_question_id: Optional[tuple[str, int]]) -> Optional[tuple[str, int]]:
+        if isinstance(parallel_question_id, tuple) and isinstance(parallel_question_id[0], str):
+            cls._validate_string(parallel_question_id[0])
+
+        return parallel_question_id
+
     @field_validator(
         "country", "file_name", "source", "license", "level", "category_en", "category_original_lang",
-        "original_question_num", "question", "image_png", "parallel_question_num"
+        "original_question_num", "question", "image_png"
     )
     def validate_string_fields(cls, value: Optional[str]) -> Optional[str]:
         return cls._validate_string(value) if isinstance(value, str) else value
 
+    @model_validator(mode="after")
+    def validate_image_data(cls, model: "EntrySchema") -> "EntrySchema":
+        image_data = [model.image_png, model.image_information, model.image_type]
+
+        if any(image_data) and not all(image_data):
+            raise ValueError(
+                "All fields related to image data (prefixed with 'image_') must be specified if any one of them is specified"
+            )
+
+        return model
+
     class Config:
         extra = "forbid"
 
@@ -94,10 +112,13 @@ def __init__(self, index: int, location: Optional[tuple], message: str) -> None:
         self.message = message
 
     def __str__(self) -> str:
+        message = self.message.removeprefix("Value error, ")
+
         if self.location:
-            return f"Location: {str(self.location).strip("(,)")}, error: {self.message.lower()}"
+            location = str(self.location).strip("(,)")
+            return f"Location: {location}, error: {message.lower()}"
 
-        return self.message
+        return message
 
 
 class DatasetValidator:
@@ -125,7 +146,7 @@ def _load_json(self) -> bool:
                 entries = json.load(file)
 
                 if not isinstance(entries, list):
-                    raise ValueError(f"The file must contain a JSON array (list of entries)")
+                    raise ValueError("The file must contain a JSON array (list of entries)")
 
                 self.json_entries = entries
             return True

From 0774dec9f3f9e2a26dd2ccded06dd4fe32b144a2 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Mon, 9 Dec 2024 16:29:14 +0200
Subject: [PATCH 11/12] Allow n-choice questions

---
 validator/check_dataset.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index f23ee9d..f7ce211 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -14,9 +14,6 @@
 from rich.tree import Tree
 
 
-EXPECTED_OPTIONS_COUNT = 4
-
-
 class EntrySchema(BaseModel):
     language: str
     country: str
@@ -59,12 +56,12 @@ def validate_options(cls, options: list[str]) -> list[str]:
         for option in options:
             cls._validate_string(option)
 
+        if len(options) < 2:
+            raise ValueError(f"Expected at least 2 options, but got {len(options)}")
+
         if len(set(options)) != len(options):
             raise ValueError("All values must be unique")
 
-        if len(options) != EXPECTED_OPTIONS_COUNT:
-            raise ValueError(f"Expected {EXPECTED_OPTIONS_COUNT} values, but got {len(options)}")
-
         return options
 
     @field_validator("answer")

From 46cae54b806f0a635cb25d3bb37c58c400bda6c1 Mon Sep 17 00:00:00 2001
From: Danylo Boiko <danielboyko02@gmail.com>
Date: Mon, 9 Dec 2024 17:02:05 +0200
Subject: [PATCH 12/12] Format strip call

---
 validator/check_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validator/check_dataset.py b/validator/check_dataset.py
index f7ce211..e5356c3 100644
--- a/validator/check_dataset.py
+++ b/validator/check_dataset.py
@@ -112,7 +112,7 @@ def __str__(self) -> str:
         message = self.message.removeprefix("Value error, ")
 
         if self.location:
-            location = str(self.location).strip("(,)")
+            location = str(self.location).strip(",()")
             return f"Location: {location}, error: {message.lower()}"
 
         return message