Prompt refactor (#125)

* Prompt manager integrated and working with 6 tests * Updated templates to for prompt-template update
trustgraph-ai · Oct 26, 2024 · 1e13776 · 1e13776
1 parent 51aef6c
commit 1e13776
Show file tree

Hide file tree

Showing 19 changed files with 647 additions and 477 deletions.
diff --git a/templates/components/googleaistudio.jsonnet b/templates/components/googleaistudio.jsonnet
@@ -13,7 +13,7 @@ local prompts = import "prompts/mixtral.jsonnet";
         create:: function(engine)
 
             local envSecrets = engine.envSecrets("bedrock-credentials")
-                .with_env_var("GOOGLEAISTUDIO_KEY", "googleaistudio-key");
+                .with_env_var("GOOGLE_AI_STUDIO_KEY", "googleaistudio-key");
 
             local container =
                 engine.container("text-completion")

diff --git a/templates/components/prompt-template.jsonnet b/templates/components/prompt-template.jsonnet
@@ -17,22 +17,38 @@ local default_prompts = import "prompts/default-prompts.jsonnet";
                         "prompt-template",
                         "-p",
                         url.pulsar,
+
                         "--text-completion-request-queue",
                         "non-persistent://tg/request/text-completion",
                         "--text-completion-response-queue",
                         "non-persistent://tg/response/text-completion-response",
-                        "--definition-template",
+
+                        "--system-prompt",
+                        $["system-template"],
+
+                        "--prompt",
+                        "question={{question}}",
+                        "extract-definitions=" +
                         $["prompt-definition-template"],
-                        "--relationship-template",
+                        "extract-relationships=" +
                         $["prompt-relationship-template"],
-                        "--topic-template",
+                        "extract-topics=" +
                         $["prompt-topic-template"],
-                        "--knowledge-query-template",
+                        "kg-prompt=" +
                         $["prompt-knowledge-query-template"],
-                        "--document-query-template",
+                        "document-prompt=" +
                         $["prompt-document-query-template"],
-                        "--rows-template",
+                        "extract-rows=" +
                         $["prompt-rows-template"],
+
+                        "--prompt-response-type",
+                        "extract-definitions=json",
+                        "extract-relationships=json",
+                        "extract-topics=json",
+                        "kg-prompt=text",
+                        "document-prompt=text",
+                        "extract-rows=json",
+
                     ])
                     .with_limits("0.5", "128M")
                     .with_reservations("0.1", "128M");
@@ -71,18 +87,33 @@ local default_prompts = import "prompts/default-prompts.jsonnet";
                         "non-persistent://tg/request/text-completion-rag",
                         "--text-completion-response-queue",
                         "non-persistent://tg/response/text-completion-rag-response",
-                        "--definition-template",
+
+                        "--system-prompt",
+                        $["system-template"],
+
+                        "--prompt",
+                        "question={{question}}",
+                        "extract-definitions=" +
                         $["prompt-definition-template"],
-                        "--relationship-template",
+                        "extract-relationships=" +
                         $["prompt-relationship-template"],
-                        "--topic-template",
+                        "extract-topics=" +
                         $["prompt-topic-template"],
-                        "--knowledge-query-template",
+                        "kg-prompt=" +
                         $["prompt-knowledge-query-template"],
-                        "--document-query-template",
+                        "document-prompt=" +
                         $["prompt-document-query-template"],
-                        "--rows-template",
+                        "extract-rows=" +
                         $["prompt-rows-template"],
+
+                        "--prompt-response-type",
+                        "extract-definitions=json",
+                        "extract-relationships=json",
+                        "extract-topics=json",
+                        "kg-prompt=text",
+                        "document-prompt=text",
+                        "extract-rows=json",
+
                     ])
                     .with_limits("0.5", "128M")
                     .with_reservations("0.1", "128M");

diff --git a/templates/prompts/default-prompts.jsonnet b/templates/prompts/default-prompts.jsonnet
@@ -4,16 +4,18 @@
 
 {
 
-    "prompt-definition-template":: "<instructions>\nStudy the following text and derive definitions for any discovered entities.\nDo not provide definitions for entities whose definitions are incomplete\nor unknown.\nOutput relationships in JSON format as an arary of objects with fields:\n- entity: the name of the entity\n- definition: English text which defines the entity\n</instructions>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract will be written as plain text.  Do not add markdown formatting\nor headers or prefixes.  Do not include null or unknown definitions.\n</requirements>",
+    "system-template":: "You are a helpful assistant.",
 
-    "prompt-relationship-template":: "<instructions>\nStudy the following text and derive entity relationships.  For each\nrelationship, derive the subject, predicate and object of the relationship.\nOutput relationships in JSON format as an arary of objects with fields:\n- subject: the subject of the relationship\n- predicate: the predicate\n- object: the object of the relationship\n- object-entity: false if the object is a simple data type: name, value or date.  true if it is an entity.\n</instructions>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract must be written as plain text.  Do not add markdown formatting\nor headers or prefixes.\n</requirements>",
+    "prompt-definition-template":: "<instructions>\nStudy the following text and derive definitions for any discovered entities.\nDo not provide definitions for entities whose definitions are incomplete\nor unknown.\nOutput relationships in JSON format as an arary of objects with fields:\n- entity: the name of the entity\n- definition: English text which defines the entity\n</instructions>\n\n<text>\n{{text}}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract will be written as plain text.  Do not add markdown formatting\nor headers or prefixes.  Do not include null or unknown definitions.\n</requirements>",
 
-    "prompt-topic-template":: "You are a helpful assistant that performs information extraction tasks for a provided text.\nRead the provided text. You will identify topics and their definitions in JSON.\n\nReading Instructions:\n- Ignore document formatting in the provided text.\n- Study the provided text carefully.\n\nHere is the text:\n{text}\n\nResponse Instructions: \n- Do not respond with special characters.\n- Return only topics that are concepts and unique to the provided text.\n- Respond only with well-formed JSON.\n- The JSON response shall be an array of objects with keys \"topic\" and \"definition\". \n- The JSON response shall use the following structure:\n\n```json\n[{{\"topic\": string, \"definition\": string}}]\n```\n\n- Do not write any additional text or explanations.",
+    "prompt-relationship-template":: "<instructions>\nStudy the following text and derive entity relationships.  For each\nrelationship, derive the subject, predicate and object of the relationship.\nOutput relationships in JSON format as an arary of objects with fields:\n- subject: the subject of the relationship\n- predicate: the predicate\n- object: the object of the relationship\n- object-entity: false if the object is a simple data type: name, value or date.  true if it is an entity.\n</instructions>\n\n<text>\n{{text}}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract must be written as plain text.  Do not add markdown formatting\nor headers or prefixes.\n</requirements>",
 
-    "prompt-knowledge-query-template":: "Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere's the knowledge statements:\n{graph}\n\nUse only the provided knowledge statements to respond to the following:\n{query}\n",
+    "prompt-topic-template":: "You are a helpful assistant that performs information extraction tasks for a provided text.\nRead the provided text. You will identify topics and their definitions in JSON.\n\nReading Instructions:\n- Ignore document formatting in the provided text.\n- Study the provided text carefully.\n\nHere is the text:\n{{text}}\n\nResponse Instructions: \n- Do not respond with special characters.\n- Return only topics that are concepts and unique to the provided text.\n- Respond only with well-formed JSON.\n- The JSON response shall be an array of objects with keys \"topic\" and \"definition\". \n- The JSON response shall use the following structure:\n\n```json\n[{\"topic\": string, \"definition\": string}]\n```\n\n- Do not write any additional text or explanations.",
 
-    "prompt-document-query-template":: "Study the following context. Use only the information provided in the context in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere is the context:\n{documents}\n\nUse only the provided knowledge statements to respond to the following:\n{query}\n",
+    "prompt-knowledge-query-template":: "Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere's the knowledge statements:\n{% for edge in knowledge %}({{edge.s}})-[{{edge.p}}]->({{edge.o}})\n{%endfor%}\n\nUse only the provided knowledge statements to respond to the following:\n{{query}}\n",
 
-    "prompt-rows-template":: "<instructions>\nStudy the following text and derive objects which match the schema provided.\n\nYou must output an array of JSON objects for each object you discover\nwhich matches the schema.  For each object, output a JSON object whose fields\ncarry the name field specified in the schema.\n</instructions>\n\n<schema>\n{schema}\n</schema>\n\n<text>\n{text}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not add markdown formatting or headers or prefixes.\n</requirements>",
+    "prompt-document-query-template":: "Study the following context. Use only the information provided in the context in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere is the context:\n{{documents}}\n\nUse only the provided knowledge statements to respond to the following:\n{{query}}\n",
+
+    "prompt-rows-template":: "<instructions>\nStudy the following text and derive objects which match the schema provided.\n\nYou must output an array of JSON objects for each object you discover\nwhich matches the schema.  For each object, output a JSON object whose fields\ncarry the name field specified in the schema.\n</instructions>\n\n<schema>\n{{schema}}\n</schema>\n\n<text>\n{{text}}\n</text>\n\n<requirements>\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not add markdown formatting or headers or prefixes.\n</requirements>",
 
 }
diff --git a/tests/README.prompts b/tests/README.prompts
@@ -0,0 +1,27 @@
+
+test-prompt-... is tested with this prompt set...
+
+prompt-template \
+    -p pulsar://localhost:6650 \
+    --system-prompt 'You are a {{attitude}}, you are called {{name}}' \
+    --global-term \
+        'name=Craig' \
+        'attitude=LOUD, SHOUTY ANNOYING BOT' \
+    --prompt \
+        'question={{question}}' \
+        'french-question={{question}}' \
+        "analyze=Find the name and age in this text, and output a JSON structure containing just the name and age fields: {{description}}.  Don't add markup, just output the raw JSON object." \
+        "graph-query=Study the following knowledge graph, and then answer the question.\\n\nGraph:\\n{% for edge in knowledge %}({{edge.0}})-[{{edge.1}}]->({{edge.2}})\\n{%endfor%}\\nQuestion:\\n{{question}}" \
+        "extract-definition=Analyse the text provided, and then return a list of terms and definitions.  The output should be a JSON array, each item in the array is an object with fields 'term' and 'definition'.Don't add markup, just output the raw JSON object.  Here is the text:\\n{{text}}" \
+    --prompt-response-type \
+        'question=text' \
+        'analyze=json' \
+        'graph-query=text' \
+        'extract-definition=json' \
+    --prompt-term \
+        'question=name:Bonny' \
+        'french-question=attitude:French-speaking bot' \
+    --prompt-schema \
+        'analyze={ "type" : "object", "properties" : { "age": { "type" : "number" }, "name": { "type" : "string" } } }' \
+        'extract-definition={ "type": "array", "items": { "type": "object", "properties": { "term": { "type": "string" }, "definition": { "type": "string" } }, "required": [ "term", "definition" ] } }'
+
diff --git a/tests/test-lang-definition b/tests/test-lang-definition
@@ -7,7 +7,13 @@ p = PromptClient(pulsar_host="pulsar://localhost:6650")
 
 chunk = """I noticed a cat in my garden.  It is a four-legged animal
 which is a mammal and can be tame or wild.  I wonder if it will be friends
-with me.  I think the cat's name is Fred and it has 4 legs"""
+with me.  I think the cat's name is Fred and it has 4 legs.
+
+A cat is a small mammal.
+
+A grapefruit is a citrus fruit.
+
+"""
 
 resp = p.request_definitions(
     chunk=chunk,

diff --git a/tests/test-lang-topics b/tests/test-lang-topics
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+chunk = """I noticed a cat in my garden.  It is a four-legged animal
+which is a mammal and can be tame or wild.  I wonder if it will be friends
+with me.  I think the cat's name is Fred and it has 4 legs"""
+
+resp = p.request_topics(
+    chunk=chunk,
+)
+
+for d in resp:
+    print(d.topic)
+    print("  ", d.definition)
+
diff --git a/tests/test-prompt-analyze b/tests/test-prompt-analyze
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import json
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+description = """Fred is a 4-legged cat who is 12 years old"""
+
+resp = p.request(
+    id="analyze",
+    terms = {
+        "description": description,
+    }
+)
+
+print(json.dumps(resp, indent=4))
+
diff --git a/tests/test-prompt-extraction b/tests/test-prompt-extraction
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+
+import json
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+chunk="""
+ The Space Shuttle was a reusable spacecraft that transported astronauts and cargo to and from Earth's orbit. It was designed to launch like a rocket, maneuver in orbit like a spacecraft, and land like an airplane. The Space Shuttle was NASA's space transportation system and was used for many purposes, including: 
+
+    Carrying astronauts
+    The Space Shuttle could carry up to seven astronauts at a time. 
+
+Launching, recovering, and repairing satellites
+The Space Shuttle could launch satellites into orbit, recover them, and repair them. 
+Building the International Space Station
+The Space Shuttle carried large parts into space to build the International Space Station. 
+Conducting research
+Astronauts conducted experiments in the Space Shuttle, which was like a science lab in space. 
+
+The Space Shuttle was retired in 2011 after the Columbia accident in 2003. The Columbia Accident Investigation Board report found that the Space Shuttle was unsafe and expensive to make safe. 
+Here are some other facts about the Space Shuttle: 
+
+    The Space Shuttle was 184 ft tall and had a diameter of 29 ft. 
+
+The Space Shuttle had a mass of 4,480,000 lb. 
+The Space Shuttle's first flight was on April 12, 1981. 
+The Space Shuttle's last mission was in 2011. 
+"""
+
+q = "Tell me some facts in the knowledge graph"
+
+resp = p.request(
+    id="extract-definition",
+    terms = {
+        "text": chunk,
+    }
+)
+
+print(resp)
+
+for fact in resp:
+    print(fact["term"], "::")
+    print(fact["definition"])
+    print()
+
diff --git a/tests/test-prompt-french-question b/tests/test-prompt-french-question
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+question = """What is the square root of 16?"""
+
+resp = p.request(
+    id="french-question",
+    terms = {
+        "question": question
+    }
+)
+
+print(resp)
+
diff --git a/tests/test-prompt-knowledge b/tests/test-prompt-knowledge
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+import json
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+knowledge = [
+      ("accident", "evoked", "a wide range of deeply felt public responses"),
+      ("Space Shuttle concept", "had", "genesis"),
+      ("Commission", "had", "a mandate to develop recommendations for corrective or other action based upon the Commission's findings and determinations"),
+      ("Commission", "established", "teams of persons"),
+      ("Space Shuttle Challenger", "http://www.w3.org/2004/02/skos/core#definition", "A space shuttle that was destroyed in an accident during mission 51-L."),
+      ("The mid fuselage", "contains", "the payload bay"),
+      ("Volume I", "contains", "Chapter IX"),
+      ("accident", "resulted in", "firm national resolve that those men and women be forever enshrined in the annals of American heroes"),
+      ("Volume I", "contains", "Chapter VII"),
+      ("Volume I", "contains", "Chapter II"),
+      ("Volume I", "contains", "Chapter V"),
+      ("Commission", "believes", "its investigation and report have been responsive to the request of the President and hopes that they will serve the best interests of the nation in restoring the United States space program to its preeminent position in the world"),
+      ("Commission", "construe", "mandate"),
+      ("accident", "became", "a milestone on the way to achieving the full potential that space offers to mankind"),
+      ("Volume I", "contains", "The Commission"),
+      ("Commission", "http://www.w3.org/2004/02/skos/core#definition", "A group established to investigate the space shuttle accident"),
+      ("Volume I", "contains", "Appendix D"),
+      ("Commission", "had", "a mandate to review the circumstances surrounding the accident to establish the probable cause or causes of the accident"),
+      ("Volume I", "contains", "Recommendations")
+]
+
+q = "Tell me some facts in the knowledge graph"
+
+resp = p.request(
+    id="graph-query",
+    terms = {
+        "name": "Jayney",
+        "knowledge": knowledge,
+        "question": q
+    }
+)
+
+print(resp)
+
+
+
diff --git a/tests/test-prompt-question b/tests/test-prompt-question
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+question = """What is the square root of 16?"""
+
+resp = p.request(
+    id="question",
+    terms = {
+        "question": question
+    }
+)
+
+print(resp)
+
diff --git a/tests/test-prompt-spanish-question b/tests/test-prompt-spanish-question
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.prompt_client import PromptClient
+
+p = PromptClient(pulsar_host="pulsar://localhost:6650")
+
+question = """What is the square root of 16?"""
+
+resp = p.request(
+    id="question",
+    terms = {
+        "question": question,
+        "attitude": "Spanish-speaking bot"
+    }
+)
+
+print(resp)
+