Fix issues

ls1intum · Jan 12, 2025 · 8a25d40 · 8a25d40
1 parent 4cf793e
commit 8a25d40
Show file tree

Hide file tree

Showing 7 changed files with 259 additions and 43 deletions.
diff --git a/llm_core/llm_core/utils/predict_and_parse.py b/llm_core/llm_core/utils/predict_and_parse.py
@@ -4,16 +4,18 @@
 from langchain_core.pydantic_v1 import BaseModel, ValidationError
 from langchain_core.runnables import RunnableSequence
 from athena import get_experiment_environment
+from llm_core.models import DefaultModelConfig
 
 T = TypeVar("T", bound=BaseModel)
 
 async def predict_and_parse(
-        model: BaseLanguageModel, 
+        model: BaseLanguageModel,
         chat_prompt: ChatPromptTemplate, 
         prompt_input: dict, 
         pydantic_object: Type[T], 
         tags: Optional[List[str]],
         use_function_calling: bool = False
+
     ) -> Optional[T]:
     """Predicts an LLM completion using the model and parses the output using the provided Pydantic model
 

diff --git a/modules/text/module_text_llm/internal_grading_instructions.json b/modules/text/module_text_llm/internal_grading_instructions.json
@@ -0,0 +1,87 @@
+{
+    "3002": {
+        "grading_instructions": [
+            {
+                "title": "Plagiarism",
+                "description": "You plagiarised the slide. We noted it down and will reserve us the right to exclude you from the bonus",
+                "credits": 0.0,
+                "grading_instruction_id": 6052,
+                "student_text_example": [
+                    "Cohesion is the measure of how well the elements of a module belong together, while coupling is the measure of how much one module relies on another. These terms are important because they affect the maintainability and flexibility of a system."
+                ]
+            },
+            {
+                "title": "Assessment of the Difference between Coupling/Cohesion",
+                "description": "You correctly explained the difference between coupling and cohesion, well done!",
+                "credits": 1.0,
+                "grading_instruction_id": 6053,
+                "student_text_example": [
+                    "Cohesion is the number of dependencies within a subsystem, while Coupling is the number of dependencies between two subsystems."
+                ]
+            },
+            {
+                "title": "Assessment of the Difference between Coupling/Cohesion",
+                "description": "You provided the correct difference, however you did not explain it",
+                "credits": 0.5,
+                "grading_instruction_id": 6054,
+                "student_text_example": [
+                    "Cohesion is about internal dependencies, and coupling is about external dependencies."
+                ]
+            },
+            {
+                "title": "Assessment of the Difference between Coupling/Cohesion",
+                "description": "You did not provide a correct explanation of the difference between coupling and cohesion",
+                "credits": 0.0,
+                "grading_instruction_id": 6055,
+                "student_text_example": [
+                    "Cohesion and coupling are both about dependencies, but I don't know the exact difference."
+                ]
+            },
+            {
+                "title": "Assessment of the Explanation why Coupling/Cohesion are important",
+                "description": "You correctly explained why coupling and cohesion are important, well done!",
+                "credits": 1.0,
+                "grading_instruction_id": 6056,
+                "student_text_example": [
+                    "These terms are important in system design, because they influence the complexity of a system and allow to deal with change."
+                ]
+            },
+            {
+                "title": "Assessment of the Explanation why Coupling/Cohesion are important",
+                "description": "Your explanation of why the terms are important is incorrect, please review the lecture slides again and ask your tutor if something is still unclear",
+                "credits": 0.0,
+                "grading_instruction_id": 6057,
+                "student_text_example": [
+                    "Coupling and cohesion are important because they make the system look nice."
+                ]
+            },
+            {
+                "title": "Assessment of the Example",
+                "description": "Great example, well done!",
+                "credits": 1.0,
+                "grading_instruction_id": 6058,
+                "student_text_example": [
+                    "In a large multi-national company, there are several teams tailored to specific functions, such as finance, sales, legal, etc. The members of each team are closely associated with each other and work together to perform their specific function (high cohesion), but they are not too dependent on other teams (low coupling). This means that if a member of the finance team leaves, this does not affect other teams to a great extent."
+                ]
+            },
+            {
+                "title": "Assessment of the Example",
+                "description": "Correct but a weak example. Be more creative with your examples and explain them in more detail!",
+                "credits": 0.5,
+                "grading_instruction_id": 6059,
+                "student_text_example": [
+                    "A car engine and its parts are cohesive, but the engine and the wheels are loosely coupled."
+                ]
+            },
+            {
+                "title": "Assessment of the Example",
+                "description": "You did not provide an example for coupling and cohesion. Please reiterate over your submission and ask your tutor if something is still unclear",
+                "credits": 0.0,
+                "grading_instruction_id": 6060,
+                "student_text_example": [
+                    "I didn't include an example in my explanation."
+                ]
+            }
+        ]
+    }
+}
diff --git a/modules/text/module_text_llm/module_text_llm/__main__.py b/modules/text/module_text_llm/module_text_llm/__main__.py
@@ -25,8 +25,8 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm
 @feedback_consumer
 async def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
     logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id)
-    logger.info("Recieved feedbacks: %s", feedbacks)
-    updated_SGI = await update_grading_instructions(exercise.id, feedbacks, submission )
+    # logger.info("Recieved feedbacks: %s", feedbacks)
+    updated_SGI = await update_grading_instructions(exercise, feedbacks, submission)
     logger.info("Updated grading instructions: %s", updated_SGI)
     return updated_SGI
 @feedback_provider

diff --git a/modules/text/module_text_llm/module_text_llm/helpers/get_internal_sgi.py b/modules/text/module_text_llm/module_text_llm/helpers/get_internal_sgi.py
@@ -1,16 +1,33 @@
 import os
 import json
 from module_text_llm.in_context_learning.generate_internal import generate
-from athena.text import Submission, Feedback
+from athena.text import Submission, Feedback, TextFeedback
 from typing import List
 
+# Define Feedback_with_reference with the additional attribute and proper inheritance
 class Feedback_with_reference(Feedback):
-   reference_text : str
-
+    reference_text: str
+
+    def __init__(self, feedback: Feedback,exerciseId: int, submission: Submission):
+        # Initialize parent Feedback attributes
+        super().__init__(
+            id=feedback.id,
+            index_start=feedback.index_start,
+            index_end=feedback.index_end,
+            description=feedback.description,
+            credits=feedback.credits,
+            title=feedback.title,
+            exerciseId = exerciseId,
+            submissionId = submission.id,
+            referenceText = ""
+            )
+        # Initialize the new attribute
+        self.reference_text = ""
+
 def get_internal_sgi():
     file_name = 'internal_grading_instructions.json'
 
-    # Load existing data if file exists, else create a new structure
+    # Load existing data if the file exists, else create a new structure
     if os.path.exists(file_name):
         with open(file_name, 'r') as file:
             internal_instructions = json.load(file)
@@ -22,16 +39,17 @@ def write_internal_sgi(exerciseId: int, internal_instructions):
     file_name = 'internal_grading_instructions.json'
     with open(file_name, 'w') as file:
         json.dump(internal_instructions, file, indent=4)
-        
-def extract_text_from_reference(submission: Submission, feedbacks: List[Feedback]) -> List[Feedback_with_reference]:
+
+def extract_text_from_reference(exerciseId : int, submission: Submission, feedbacks: List[Feedback]) -> List[Feedback_with_reference]:
     text = submission.text
     feedback_with_references = []
     for feedback in feedbacks:
-        feedback_with_reference = feedback
+        # Create an instance of Feedback_with_reference
+        feedback_with_reference = Feedback_with_reference(feedback, exerciseId, submission)
+        # Extract reference text or set it as "Unreferenced" if indices are missing
         if feedback.index_start is not None and feedback.index_end is not None:
-            feedback_with_reference.reference_text = text[feedback.index_start, feedback.index_end]
+            feedback_with_reference.reference_text = text[feedback.index_start:feedback.index_end]
         else:
             feedback_with_reference.reference_text = "Unreferenced"
         feedback_with_references.append(feedback_with_reference)
     return feedback_with_references
-
diff --git a/...text/module_text_llm/module_text_llm/in_context_learning/generate_updated_internal_SGI.py b/...text/module_text_llm/module_text_llm/in_context_learning/generate_updated_internal_SGI.py
@@ -4,60 +4,49 @@
 from athena.text import Exercise, Submission, Feedback
 from athena.logger import logger
 from llm_core.utils.llm_utils import (
-    get_chat_prompt_with_formatting_instructions, 
-    check_prompt_length_and_omit_features_if_necessary, 
-    num_tokens_from_prompt,
+    get_chat_prompt_with_formatting_instructions
 )
 import os
 import json
 from llm_core.utils.predict_and_parse import predict_and_parse
-from module_text_llm.approach_config import ApproachConfig
-from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
+from module_text_llm.helpers.utils import add_sentence_numbers,format_grading_instructions
 from module_text_llm.in_context_learning.prompt_internal import InternalGradingInstructions
-from module_text_llm.in_context_learning.prompt_internal import system_message, human_message
+from module_text_llm.in_context_learning.prompt_internal import system_message_upgrade, human_message_upgrade
 from module_text_llm.helpers.get_internal_sgi import get_internal_sgi, write_internal_sgi, extract_text_from_reference
 from module_text_llm.in_context_learning import InContextLearningConfig
-async def update_grading_instructions(exercise: Exercise, feedbacks:List[Feedback], submission : Submission ) -> List[Feedback]:
+async def update_grading_instructions(exercise: Exercise, feedbacks:List[Feedback], submission : Submission) -> List[Feedback]:
 
-# What do we need to do here. We need to give the existing internal grading instructions
-# together with the exercise data and the new suggestions. We ened to think about the structure
-# of how the updated instructions look like.
-# Do we want to provide examples of assessments?
-# Do we want to provide examples of feedbacks
+    logger.info("Generating updated internal SGI")
     debug = True
     iSGI = get_internal_sgi()
-    if(exercise.id not in iSGI):
+    ex_id = str(exercise.id)   
+    if(ex_id not in iSGI):
+        logger.info("Not in iSGI")
         return []
-    internal_instructions = iSGI[exercise.id]
+    internal_instructions = iSGI[ex_id]
     # We get the internal SGI
     config = InContextLearningConfig()
     model = config.model.get_model()  # type: ignore[attr-defined]
     prompt_input = {
         "max_points": exercise.max_points,
         "bonus_points": exercise.bonus_points,
-# We will no longer send the exercise grading instructions but rather the internal SGI
         "internal_SGI": str(internal_instructions),
-        #"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
+        "grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
         "problem_statement": exercise.problem_statement or "No problem statement.",
         "example_solution": exercise.example_solution,
-        "feedbacks" : extract_text_from_reference(submission, feedbacks), # Model suggestion TODO get the exact text from the submission
-        # "adapted_suggestion" : [], # Tutor adapated suggestion
+        "feedbacks" : extract_text_from_reference(exercise.id,submission, feedbacks), # Model suggestion TODO get the exact text from the submission
         "submission": add_sentence_numbers(submission.text)
     }
-    #[TextFeedback(id=23, title=None, description='test2', credits=0.5, 
-    # structured_grading_instruction_id=None, is_graded=None, meta={}, 
-    # exercise_id=484, submission_id=2563, index_start=None, index_end=None)]
-
-# Additionally, we have to send the suggested feedback along with the adjustment.
+
     chat_prompt = get_chat_prompt_with_formatting_instructions(
         model=model, 
-        system_message=system_message, 
-        human_message=human_message, 
+        system_message=system_message_upgrade, 
+        human_message=human_message_upgrade, 
         pydantic_object=InternalGradingInstructions
     )
 
     result = await predict_and_parse(
-        model=model, 
+        model=model,
         chat_prompt=chat_prompt, 
         prompt_input=prompt_input, 
         pydantic_object=InternalGradingInstructions,
@@ -74,8 +63,9 @@ async def update_grading_instructions(exercise: Exercise, feedbacks:List[Feedbac
         })
 
     if result is None:
+        print("result was none")
         return []
 
-    iSGI[exercise.id] = result.dict()
+    iSGI[ex_id] = result.dict()
     write_internal_sgi(exercise.id, iSGI)
     return result
diff --git a/modules/text/module_text_llm/module_text_llm/in_context_learning/prompt_internal.py b/modules/text/module_text_llm/module_text_llm/in_context_learning/prompt_internal.py
@@ -1,17 +1,49 @@
 from pydantic import Field, BaseModel
 from typing import List, Optional
 
+system_message_upgrade = """
+You are tasked with upgrading the following Grading instructions. 
+You have been given some tutor feedback based on a student submission to do so. 
+Keep in mind that you must stay consistent with the original grading instructions.
+Do not remove any grading instructions, only add new ones if absoluetly necessary in the case that you think the llm will provide feedback consistent to the tutor.
+"""
+
+human_message_upgrade = """\
+# Problem statement
+{problem_statement}
+
+# Example solution
+{example_solution}
+
+# Original Grading Instructions
+{grading_instructions}
+
+# Internal Grading instructions
+{internal_SGI}
+
+#Tutor Feedback
+{feedbacks}
+
+# Submission
+{submission}
+
+#
+Max points: {max_points}, bonus points: {bonus_points}\
+    
+Respond in json.
+"""
+
 system_message = """\
 You are an AI tutor for text assessment at a prestigious university.
 
 # Task
 You are an assistant at a prestigious University. You are assisting in assessing student text based submissions.
-In order to do so, it is important to have structurred grading instructions, which will be continuesly updated.
-
-Your task is to use the provided grading instructions and use the example solution, to provide examples for how 
-the grading instruction is utilized.
+In order to do so, it is important to have internal structurred grading instructions that will help the llm give better feedback, which will be continuesly updated.
 
+Your task is to use the provided grading instructions and use the example solution, to create a set of grading instructions that a machine can
+better understand and use to give feedback to students that is consistent with human feedback.
 
+Stay focused on the existing grading instructions, do not add new criteria, for example if grammar, clarity and whatever else is not an instruction do not add it.
 """
 
 human_message = """\