import dspy
import os
from IPython.display import display, HTML
!pip show dspy-ai | grep Version
Version: 2.4.9
May 27, 2024
I have a read a lot about the capabilites of the dspy library from stanford. I am trying to understand its various capabilities. Let start with the https://github.com/stanfordnlp/dspy/blob/main/intro.ipynb. My comments will be highlighted in red
Version: 2.4.9
We are using groq inference as it is much cheaper and faster than openai
# helper functions to print in color
def color_html(s, color='black'):
'''returns a string with the given color in html format'''
s = s.replace('\n', '<br>')
return "<text style=color:{}>{}</text>".format(color, s)
def cprint(s, color='black'):
'''prints a string with the given color'''
display(HTML(color_html(s, color)))
def print_prompt(lm, idx=-1):
cprint(lm.history[idx]['prompt'], color='blue')
def print_response(lm, idx=-1):
cprint(lm.history[idx]['response'], color='green')
Whatever the task, the general workflow is:
from dspy.datasets import HotPotQA
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)
# tell DSPy that the question field is the one we want to use (from all the fields in the dataset)
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]
len(trainset), len(devset)
/Users/achinta/miniforge3/envs/deepsql/lib/python3.11/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.
table = cls._concat_blocks(blocks, axis=0)
(20, 50)
We just loaded trainset(20 examples) and devset(50 examples). Lets look at some examples from the trainset.
train_example = trainset[0]
print(f"Question: {train_example.question}")
print(f"Answer: {train_example.answer}")
# select a train example and a dev example
train_example = trainset[0]
dev_example = devset[18]
Question: At My Window was released by which American singer-songwriter?
Answer: John Townes Van Zandt
class BasicQA(dspy.Signature):
'''Answers questions with short factoid answers'''
question = dspy.InputField()
answer = dspy.OutputField(desc="often between 1 and 5 words")
# define predictor
generate_answer = dspy.Predict(BasicQA)
# call the predictor on the particular input
pred = generate_answer(question=dev_example.question)
# print the inpput and the prediction
print(f"Question: {dev_example.question}")
print(f"Prediction: {pred.answer}")
Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Prediction: Robert Irvine
{'prompt': "Answers questions with short factoid answers\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: often between 1 and 5 words\n\n---\n\nQuestion: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?\nReasoning: Let's think step by step in order to",
'response': "Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?\nReasoning: Let's think step by step in order to figure out the nationality of the chef. He is an American chef and restaurateur, and the show is set in the United States. Therefore...\nAnswer: American",
'kwargs': {'temperature': 0.0,
'max_tokens': 150,
'top_p': 1,
'frequency_penalty': 0,
'presence_penalty': 0,
'n': 1,
'model': 'llama3-8b-8192',
'messages': [{'role': 'user',
'content': "Answers questions with short factoid answers\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: often between 1 and 5 words\n\n---\n\nQuestion: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?\nReasoning: Let's think step by step in order to"}]},
'raw_kwargs': {}}
Lets now look at the prompt and the response
Let us use chain-of-thought. We can do that by creating a ChainOfThought class from any signature.
# Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged.
generate_answer_with_cot = dspy.ChainOfThought(BasicQA)
# call the predictor on the particular input
pred = generate_answer_with_cot(question=dev_example.question)
print(f"Question: {dev_example.question}")
print(f"Thought: {pred.rationale.split('.', 1)[1].strip()}")
print(f"Predicted Answer: {pred.answer}")
Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Thought: He is an American chef and restaurateur, and the show is set in the United States. Therefore...
Predicted Answer: American
Lets now look at the prompt and the response for ChainOfThought
number of LLM calls till now - 2
The Reasoning
variable is included in the prompt by the class dspy.ChainOfThought
. (check the code)
How is the prompt able to generate completions for multiple output variables (say Reasoning and answer) in the same prompt?
Or is it? The prompt cleverly stops after Reasoning: Let's think step by step in order to
and the response is generated for
Reasoning
as well as the answer
When the LLM is able to generate only plain text completion, how are we able to populate output variables in the Predictions class?
retrieve = dspy.Retrieve(k=3)
topK_passages = retrieve(dev_example.question).passages
for idx, passage in enumerate(topK_passages):
print(f'{idx+1}]', passage, '\n')
1] Restaurant: Impossible | Restaurant: Impossible is an American reality television series, featuring chef and restaurateur Robert Irvine, that aired on Food Network from 2011 to 2016.
2] Jean Joho | Jean Joho is a French-American chef and restaurateur. He is chef/proprietor of Everest in Chicago (founded in 1986), Paris Club Bistro & Bar and Studio Paris in Chicago, The Eiffel Tower Restaurant in Las Vegas, and Brasserie JO in Boston.
3] List of Restaurant: Impossible episodes | This is the list of the episodes for the American cooking and reality television series "Restaurant Impossible", produced by Food Network. The premise of the series is that within two days and on a budget of $10,000, celebrity chef Robert Irvine renovates a failing American restaurant with the goal of helping to restore it to profitability and prominence. Irvine is assisted by a designer (usually Taniya Nayak, Cheryl Torrenueva, or Lynn Keagan, but sometimes Vanessa De Leon, Krista Watterworth, Yvette Irene, or Nicole Faccuito), along with general contractor Tom Bury, who sometimes does double duty as both general contractor and designer. After assessing the problems with the restaurant, Robert Irvine typically creates a plan for the new decor, oversees the cleaning of the restaurant, reduces the size of the menu and improves the food, develops a promotional activity, educates the restaurant's owners, or trains the staff, as needed by each restaurant.
Let’s define our first complete program for this task. We’ll build a retrieval-augmented pipeline for answer generation.
Given a question, we’ll search for the top-3 passages in Wikipedia and then feed them as context for answer generation.
Let’s start by defining this signature: context, question --> answer
.
class GenerateAnswer(dspy.Signature):
"""Answers questions with short factoid answers"""
context = dspy.InputField(desc='may contain relevant facts')
question = dspy.InputField()
answer = dspy.OutputField(desc='often between 1 and 5 words')
class RAG(dspy.Module):
def __init__(self, num_passages=3):
super().__init__()
self.retrieve = dspy.Retrieve(k=num_passages)
self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
def forward(self, question):
context = self.retrieve(question).passages
prediction = self.generate_answer(context=context, question=question)
return dspy.Prediction(context=context, answer=prediction.answer)
Having defined this program, let’s now compile it. Compiling a program will update the parameters stored in each module. In our setting, this is primarily in the form of collecting and selecting good demonstrations for inclusion in your prompt(s).
Compiling depends on three things:
trainset
above.validate_context_and_answer
that checks that the predicted answer is correct. It’ll also check that the retrieved context does actually contain that answer.Teleprompters: Teleprompters are powerful optimizers that can take any program and learn to bootstrap and select effective prompts for its modules. Hence the name, which means “prompting at a distance”.
Different teleprompters offer various tradeoffs in terms of how much they optimize cost versus quality, etc. We will use a simple default BootstrapFewShot
in this notebook.
If you’re into analogies, you could think of this as your training data, your loss function, and your optimizer in a standard DNN supervised learning setup. Whereas SGD is a basic optimizer, there are more sophisticated (and more expensive!) ones like Adam or RMSProp.
from dspy.teleprompt import BootstrapFewShot
# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_context_and_answer(example, pred, trace=None):
answer_EM = dspy.evaluate.answer_exact_match(example, pred)
answer_PM = dspy.evaluate.answer_passage_match(example, pred)
return answer_EM and answer_PM
# Set up a basic teleprompter, which will compile our RAG program.
teleprompter = BootstrapFewShot(metric=validate_context_and_answer, metric_threshold=None)
# Compile!
compiled_rag = teleprompter.compile(student=RAG(), trainset=trainset)
0%| | 0/20 [00:00<?, ?it/s]0.00s - Debugger warning: It seems that frozen modules are being used, which may
0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
55%|█████▌ | 11/20 [01:01<00:49, 5.55s/it]
What does compiling actually do?
- Here we want to teach the student
(the RAG model) to learn something from samples from the trainset.
- We make a copy of the student and the techer using reset_copy() method, which uses deepcopy and resets any parameters in studen or teacher
- Here the teacher is the compiled LabeledFewShot, which is nothing but selecting k (default=16) samples from the trainset.
- We set the metric_threshold to None, so we are just ignoring the result of validate_context_and_answer and just selecting all the examples.
- So let us now see what is the result of the compilation below. We can see all the 16 examples added as demos to the student RAG module.
{'retrieve': {'k': 3},
'generate_answer': {'lm': None,
'traces': [],
'train': [],
'demos': [Example({'augmented': True, 'context': ['Tae Kwon Do Times | Tae Kwon Do Times is a magazine devoted to the martial art of taekwondo, and is published in the United States of America. While the title suggests that it focuses on taekwondo exclusively, the magazine also covers other Korean martial arts. "Tae Kwon Do Times" has published articles by a wide range of authors, including He-Young Kimm, Thomas Kurz, Scott Shaw, and Mark Van Schuyver.', "Kwon Tae-man | Kwon Tae-man (born 1941) was an early Korean hapkido practitioner and a pioneer of the art, first in Korea and then in the United States. He formed one of the earliest dojang's for hapkido in the United States in Torrance, California, and has been featured in many magazine articles promoting the art.", 'Hee Il Cho | Cho Hee Il (born October 13, 1940) is a prominent Korean-American master of taekwondo, holding the rank of 9th "dan" in the martial art. He has written 11 martial art books, produced 70 martial art training videos, and has appeared on more than 70 martial arts magazine covers. Cho won several national and international competitions as a taekwondo competitor, and has appeared in several films, including "Fight to Win", "Best of the Best", "Bloodsport II", and "Bloodsport III". He founded the Action International Martial Arts Association (AIMAA) in 1980, and is its President. Cho is a member of both "Black Belt" magazine\'s Hall of Fame and "Tae Kwon Do Times" magazine\'s Hall of Fame.'], 'question': 'Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?', 'rationale': 'produce the answer. We can look at the context provided, which mentions that "Tae Kwon Do Times" has published articles by a wide range of authors, including Scott Shaw. Therefore, the correct answer is:', 'answer': 'Tae Kwon Do Times'}) (input_keys=None),
Example({'augmented': True, 'context': ['Rosario Dawson | Rosario Isabel Dawson (born May 9, 1979) is an American actress, producer, singer, comic book writer, and political activist. She made her film debut in the 1995 teen drama "Kids". Her subsequent film roles include "He Got Game", "Men in Black II", "25th Hour", "Rent", "Sin City", "Death Proof", "Seven Pounds", "", and "Top Five". Dawson has also provided voice-over work for Disney and DC.', 'Sarai Gonzalez | Sarai Isaura Gonzalez (born 2005) is an American Latina child actress who made her professional debut at the age of 11 on the Spanish-language ""Soy Yo"" ("That\'s Me") music video by Bomba Estéreo. Cast as a "nerdy" tween with a "sassy" and "confident" attitude, her performance turned her into a "Latina icon" for "female empowerment, identity and self-worth". She subsequently appeared in two get out the vote videos for Latinos in advance of the 2016 United States elections.', 'Gabriela (2001 film) | Gabriela is a 2001 American romance film, starring Seidy Lopez in the title role alongside Jaime Gomez as her admirer Mike. The film has been cited as an inspiration behind the Premiere Weekend Club, which supports Latino film-making.'], 'question': 'Which American actress who made their film debut in the 1995 teen drama "Kids" was the co-founder of Voto Latino?', 'rationale': 'produce the answer. We can look at the context provided, which mentions Rosario Dawson, an American actress who made her film debut in the 1995 teen drama "Kids".', 'answer': 'Rosario Dawson'}) (input_keys=None),
Example({'augmented': True, 'context': ['Battle of Kursk | The Battle of Kursk was a Second World War engagement between German and Soviet forces on the Eastern Front near Kursk (450 km south-west of Moscow) in the Soviet Union during July and August 1943. The battle began with the launch of the German offensive, Operation Citadel (German: "Unternehmen Zitadelle" ), on 5 July, which had the objective of pinching off the Kursk salient with attacks on the base of the salient from north and south simultaneously. After the German offensive stalled on the northern side of the salient, on 12 July the Soviets commenced their Kursk Strategic Offensive Operation with the launch of Operation Kutuzov (Russian: Кутузов ) against the rear of the German forces in the northern side. On the southern side, the Soviets also launched powerful counterattacks the same day, one of which led to a large armoured clash, the Battle of Prokhorovka. On 3 August, the Soviets began the second phase of the Kursk Strategic Offensive Operation with the launch of Operation Polkovodets Rumyantsev (Russian: Полководец Румянцев ) against the German forces in the southern side of the Kursk salient.', 'Operation Mars | Operation Mars, also known as the Second Rzhev-Sychevka Offensive Operation (Russian: Вторая Ржевско-Сычёвская наступательная операция), was the codename for an offensive launched by Soviet forces against German forces during World War II. It took place between 25 November and 20 December 1942 around the Rzhev salient in the vicinity of Moscow.', 'Kholm Pocket | The Kholm Pocket (German: "Kessel von Cholm" ; Russian: Холмский котёл ) was the name given for the encirclement of German troops by the Red Army around Kholm south of Leningrad, during World War II on the Eastern Front, from 23 January 1942 until 5 May 1942. A much larger pocket was simultaneously surrounded in Demyansk, about 100 km to the northeast. These were the results of German retreat following their defeat during the Battle of Moscow.'], 'question': 'What is the code name for the German offensive that started this Second World War engagement on the Eastern Front (a few hundred kilometers from Moscow) between Soviet and German forces, which included 102nd Infantry Division?', 'rationale': 'produce the answer. We can look at the context provided, which describes the Battle of Kursk. According to the text, the German offensive was launched on 5 July, and it had the objective of pinching off the Kursk salient. The text also mentions that the German offensive was called "Operation Citadel" or "Unternehmen Zitadelle" in German.', 'answer': 'Operation Citadel'}) (input_keys=None),
Example({'augmented': True, 'context': ['Kerry Condon | Kerry Condon (born 4 January 1983) is an Irish television and film actress, best known for her role as Octavia of the Julii in the HBO/BBC series "Rome," as Stacey Ehrmantraut in AMC\'s "Better Call Saul" and as the voice of F.R.I.D.A.Y. in various films in the Marvel Cinematic Universe. She is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet."', 'Corona Riccardo | Corona Riccardo (c. 1878October 15, 1917) was an Italian born American actress who had a brief Broadway stage career before leaving to become a wife and mother. Born in Naples she came to acting in 1894 playing a Mexican girl in a play at the Empire Theatre. Wilson Barrett engaged her for a role in his play "The Sign of the Cross" which he took on tour of the United States. Riccardo played the role of Ancaria and later played Berenice in the same play. Robert B. Mantell in 1898 who struck by her beauty also cast her in two Shakespeare plays, "Romeo and Juliet" and "Othello". Author Lewis Strang writing in 1899 said Riccardo was the most promising actress in America at the time. Towards the end of 1898 Mantell chose her for another Shakespeare part, Ophelia im Hamlet. Afterwards she was due to join Augustin Daly\'s Theatre Company but Daly died in 1899. In 1899 she gained her biggest fame by playing Iras in the first stage production of Ben-Hur.', 'Judi Dench | Dame Judith Olivia "Judi" Dench, {\'1\': ", \'2\': ", \'3\': ", \'4\': "} (born 9 December 1934) is an English actress and author. Dench made her professional debut in 1957 with the Old Vic Company. Over the following few years, she performed in several of Shakespeare\'s plays in such roles as Ophelia in "Hamlet", Juliet in "Romeo and Juliet", and Lady Macbeth in "Macbeth". Although most of her work during this period was in theatre, she also branched into film work and won a BAFTA Award as Most Promising Newcomer. She drew strong reviews for her leading role in the musical "Cabaret" in 1968.'], 'question': 'Who acted in the shot film The Shore and is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet." ?', 'rationale': 'find the answer. We can look at the context provided, which mentions three actresses: Kerry Condon, Corona Riccardo, and Judi Dench. Among them, Kerry Condon is the one who is the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet."', 'answer': 'Kerry Condon'}) (input_keys=None),
Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys=None),
Example({'question': '"Everything Has Changed" is a song from an album released under which record label ?', 'answer': 'Big Machine Records'}) (input_keys=None),
Example({'question': 'The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?', 'answer': '1950'}) (input_keys=None),
Example({'question': 'Which Pakistani cricket umpire who won 3 consecutive ICC umpire of the year awards in 2009, 2010, and 2011 will be in the ICC World Twenty20?', 'answer': 'Aleem Sarwar Dar'}) (input_keys=None),
Example({'question': 'Having the combination of excellent foot speed and bat speed helped Eric Davis, create what kind of outfield for the Los Angeles Dodgers? ', 'answer': '"Outfield of Dreams"'}) (input_keys=None),
Example({'question': 'Who is older, Aleksandr Danilovich Aleksandrov or Anatoly Fomenko?', 'answer': 'Aleksandr Danilovich Aleksandrov'}) (input_keys=None),
Example({'question': 'The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?', 'answer': '2010'}) (input_keys=None),
Example({'question': 'Tombstone stared an actor born May 17, 1955 known as who?', 'answer': 'Bill Paxton'}) (input_keys=None),
Example({'question': 'In what year was the club founded that played Manchester City in the 1972 FA Charity Shield', 'answer': '1874'}) (input_keys=None),
Example({'question': 'which American actor was Candace Kita guest starred with ', 'answer': 'Bill Murray'}) (input_keys=None),
Example({'question': 'Which is taller, the Empire State Building or the Bank of America Tower?', 'answer': 'The Empire State Building'}) (input_keys=None),
Example({'question': 'Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?', 'answer': 'Buena Vista Distribution'}) (input_keys=None)],
'signature_instructions': 'Answers questions with short factoid answers',
'signature_prefix': 'Answer:',
'extended_signature_instructions': 'Answers questions with short factoid answers',
'extended_signature_prefix': 'Answer:'}}
Let us now use the compiled model and see whats happening. As the prompt size is large, the llama 7b model is unable to give the right answer. It is ignoring the instructions. So let us use a 70b model.
lm = dspy.GROQ(api_key=os.getenv('GROQ_API_KEY'), model='llama3-70b-8192')
dspy.settings.configure(lm=lm, rm=retriever)
compiled_rag = RAG()
compiled_rag.load('rag_model.json')
my_question = "What castle did David Gregory inherit?"
# Get the prediction. This contains `pred.context` and `pred.answer`.
pred = compiled_rag(my_question)
# Print the contexts and the answer.
print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")
print_prompt(lm)
print_response(lm)
Question: What castle did David Gregory inherit?
Predicted Answer: Kinnairdy Castle
Retrieved Contexts (truncated): ['David Gregory (physician) | David Gregory (20 December 1625 – 1720) was a Scottish physician and inventor. His surname is sometimes spelt as Gregorie, the original Scottish spelling. He inherited Kinn...', 'Gregory Tarchaneiotes | Gregory Tarchaneiotes (Greek: Γρηγόριος Ταρχανειώτης , Italian: "Gregorio Tracanioto" or "Tracamoto" ) was a "protospatharius" and the long-reigning catepan of Italy from 998 t...', 'David Gregory (mathematician) | David Gregory (originally spelt Gregorie) FRS (? 1659 – 10 October 1708) was a Scottish mathematician and astronomer. He was professor of mathematics at the University ...']
TODO: How did the prompt use 12 examples and 6 contexts?
From exploring the harder questions in the training/dev sets, it becomes clear that a single search query is often not enough for this task. For instance, this can be seen when a question ask about, say, the birth city of the writer of “Right Back At It Again”. A search query identifies the author correctly as “Jeremy McKinnon”, but it wouldn’t figure out when he was born.
The standard approach for this challenge in the retrieval-augmented NLP literature is to build multi-hop search systems, like GoldEn (Qi et al., 2019) and Baleen (Khattab et al., 2021). These systems read the retrieved results and then generate additional queries to gather additional information if necessary. Using DSPy, we can easily simulate such systems in a few lines of code.
We’ll still use the GenerateAnswer
signature from the RAG implementation above. All we need now is a signature for the “hop” behavior: taking some partial context and a question, generate a search query to find missing information.
class GenerateSearchQuery(dspy.Signature):
"""Write a simple search query that will help answer a complex question."""
context = dspy.InputField(desc="may contain relevant facts")
question = dspy.InputField()
query = dspy.OutputField()
from dsp.utils import deduplicate
class SimplifiedBaleen(dspy.Module):
def __init__(self, passages_per_hop=3, max_hops=2):
super().__init__()
self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
self.retrieve = dspy.Retrieve(k=passages_per_hop)
self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
self.max_hops = max_hops
def forward(self, question):
context = []
for hop in range(self.max_hops):
query = self.generate_query[hop](context=context, question=question).query
passages = self.retrieve(query).passages
context = deduplicate(context + passages)
pred = self.generate_answer(context=context, question=question)
return dspy.Prediction(context=context, answer=pred.answer)
As we can see, the __init__
method defines a few key sub-modules:
dspy.ChainOfThought
predictor with the GenerateSearchQuery
signature.dspy.Predict
module will be used after all the search steps. It has a GenerateAnswer
, to actually produce an answer.The forward
method uses these sub-modules in simple control flow.
self.max_hops
times.self.generate_query[hop]
.context
.self.generate_answer
to produce an answer.context
and predicted answer
.We will also compile this program shortly. But, before that, we can try it out in a “zero-shot” setting (i.e., without any compilation).
Using a program in zero-shot (uncompiled) setting doesn’t mean that quality will be bad. It just means that we’re bottlenecked directly by the reliability of the underlying LM to understand our sub-tasks from minimal instructions.
This is often just fine when using the most expensive/powerful models (e.g., GPT-4) on the easiest and most standard tasks (e.g., answering simple questions about popular entities).
However, a zero-shot approach quickly falls short for more specialized tasks, for novel domains/settings, and for more efficient (or open) models. DSPy can help you in all of these settings.
my_question = "How many storeys are in the castle that David Gregory inherited?"
# Get the prediction. This contains `pred.context` and `pred.answer`.
uncompiled_baleen = SimplifiedBaleen() # uncompiled (i.e., zero-shot) program
pred = uncompiled_baleen(my_question)
# Print the contexts and the answer.
print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")
Question: How many storeys are in the castle that David Gregory inherited?
Predicted Answer: Context: David Gregory inherited Kinnairdy Castle in 1664.
Question: How many storeys are in the castle that David Gregory inherited?
Reasoning: Let's think step by step in order to find the number of storeys in Kinnairdy Castle. We know that David Gregory inherited Kinnairdy Castle, and according to the context, Kinn
Retrieved Contexts (truncated): ['David Gregory (physician) | David Gregory (20 December 1625 – 1720) was a Scottish physician and inventor. His surname is sometimes spelt as Gregorie, the original Scottish spelling. He inherited Kinn...', 'Gregory Tarchaneiotes | Gregory Tarchaneiotes (Greek: Γρηγόριος Ταρχανειώτης , Italian: "Gregorio Tracanioto" or "Tracamoto" ) was a "protospatharius" and the long-reigning catepan of Italy from 998 t...', 'Gregory Parsloe-Parsloe | Sir Gregory Parsloe-Parsloe, 7th Baronet is a fictional character from the Blandings stories of P. G. Wodehouse. The seventh Baronet, who resides at Matchingham Hall, he is t...', 'Kinnairdy Castle | Kinnairdy Castle is a tower house, having five storeys and a garret, two miles south of Aberchirder, Aberdeenshire, Scotland. The alternative name is Old Kinnairdy....', 'Kinnaird Castle, Brechin | Kinnaird Castle is a 15th-century castle in Angus, Scotland. The castle has been home to the Carnegie family, the Earl of Southesk, for more than 600 years....', 'Kinnaird Head | Kinnaird Head (Scottish Gaelic: "An Ceann Àrd" , "high headland") is a headland projecting into the North Sea, within the town of Fraserburgh, Aberdeenshire on the east coast of Scotla...']
Let’s inspect the last three calls to the LM (i.e., generating the first hop’s query, generating the second hop’s query, and generating the answer).
Now is the time to compile our multi-hop (SimplifiedBaleen
) program.
We will first define our validation logic, which will simply require that:
def validate_context_and_answer_and_hops(example, pred, trace=None):
if not dspy.evaluate.answer_exact_match(example, pred): return False
if not dspy.evaluate.answer_passage_match(example, pred): return False
hops = [example.question] + [outputs.query for *_, outputs in trace if 'query' in outputs]
if max([len(h) for h in hops]) > 100: return False
if any(dspy.evaluate.answer_exact_match_str(hops[idx], hops[:idx], frac=0.8) for idx in range(2, len(hops))): return False
return True
lm = dspy.GROQ(api_key=os.getenv('GROQ_API_KEY'), model='llama3-70b-8192')
dspy.settings.configure(lm=lm, rm=retriever)
teleprompter = BootstrapFewShot(metric=validate_context_and_answer_and_hops)
compiled_baleen = teleprompter.compile(SimplifiedBaleen(), teacher=SimplifiedBaleen(passages_per_hop=2), trainset=trainset)
100%|██████████| 20/20 [13:25<00:00, 40.28s/it]
{'generate_query[0]': {'lm': None,
'traces': [],
'train': [],
'demos': [Example({'augmented': True, 'context': [], 'question': 'Tombstone stared an actor born May 17, 1955 known as who?', 'rationale': "Here's the completed response:\n\nContext: N/A\n\nQuestion: Tombstone starred an actor born May 17, 1955 known as who?\n\nReasoning: Let's think step by step in order to find the answer. We know the actor's birthdate, May 17, 1955, and the movie they starred in, Tombstone. We can use this information to search for the actor's name.", 'query': '"Tombstone movie cast born May 17 1955"'}) (input_keys=None),
Example({'question': 'Which is taller, the Empire State Building or the Bank of America Tower?', 'answer': 'The Empire State Building'}) (input_keys=None),
Example({'question': 'Which American actress who made their film debut in the 1995 teen drama "Kids" was the co-founder of Voto Latino?', 'answer': 'Rosario Dawson'}) (input_keys=None),
Example({'question': 'Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field to go where? ', 'answer': 'space'}) (input_keys=None),
Example({'question': 'The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?', 'answer': '1950'}) (input_keys=None),
Example({'question': 'What is the code name for the German offensive that started this Second World War engagement on the Eastern Front (a few hundred kilometers from Moscow) between Soviet and German forces, which included 102nd Infantry Division?', 'answer': 'Operation Citadel'}) (input_keys=None),
Example({'question': 'Which of these publications was most recently published, Who Put the Bomp or Self?', 'answer': 'Self'}) (input_keys=None),
Example({'question': 'Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?', 'answer': 'Buena Vista Distribution'}) (input_keys=None),
Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys=None),
Example({'question': 'Which Pakistani cricket umpire who won 3 consecutive ICC umpire of the year awards in 2009, 2010, and 2011 will be in the ICC World Twenty20?', 'answer': 'Aleem Sarwar Dar'}) (input_keys=None),
Example({'question': 'Who is older, Aleksandr Danilovich Aleksandrov or Anatoly Fomenko?', 'answer': 'Aleksandr Danilovich Aleksandrov'}) (input_keys=None),
Example({'question': 'This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?', 'answer': 'The Waltz King'}) (input_keys=None),
Example({'question': 'The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?', 'answer': '2010'}) (input_keys=None),
Example({'question': 'On the coast of what ocean is the birthplace of Diogal Sakho?', 'answer': 'Atlantic'}) (input_keys=None),
Example({'question': 'which American actor was Candace Kita guest starred with ', 'answer': 'Bill Murray'}) (input_keys=None),
Example({'question': 'Who acted in the shot film The Shore and is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet." ?', 'answer': 'Kerry Condon'}) (input_keys=None)],
'signature_instructions': 'Write a simple search query that will help answer a complex question.',
'signature_prefix': 'Query:',
'extended_signature_instructions': 'Write a simple search query that will help answer a complex question.',
'extended_signature_prefix': 'Query:'},
'generate_query[1]': {'lm': None,
'traces': [],
'train': [],
'demos': [Example({'augmented': True, 'context': ['Wyatt Earp: Return to Tombstone | Wyatt Earp: Return to Tombstone is a 1994 independent film starring Hugh O\'Brian as Wyatt Earp, featuring new footage mixed with colorized sequences from O\'Brian\'s 1955-1961 television series "The Life and Legend of Wyatt Earp". The supporting cast for the new footage includes Bruce Boxleitner, Paul Brinegar, Harry Carey, Jr., Bo Hopkins, and Don Meredith. The colorized flashback archival footage from the original television series features Douglas Fowley as Doc Holliday and Lloyd Corrigan as Ned Buntline. The movie was directed by Paul Landres and Frank McDonald.', 'Michael Biehn | Michael Connell Biehn (born July 31, 1956) is an American actor, primarily known for his military roles in science fiction films directed by James Cameron; as Sgt. Kyle Reese in "The Terminator" (1984), Cpl. Dwayne Hicks in "Aliens" (1986) and Lt. Coffey in "The Abyss" (1989). He was nominated for the Saturn Award for Best Actor for "Aliens." His other films include "The Fan" (1981), "K2" (1991), "Tombstone" (1993), "The Rock" (1996), "" (2001) and "Planet Terror" (2007). On television, he has appeared in "Hill Street Blues" (1984) and "Adventure Inc." (2002-03).'], 'question': 'Tombstone stared an actor born May 17, 1955 known as who?', 'rationale': 'Here is the answer:\n\nContext: [1] and [2] provide information about two different movies, "Wyatt Earp: Return to Tombstone" and "Tombstone", as well as an actor, Michael Biehn.\n\nQuestion: Tombstone stared an actor born May 17, 1955 known as who?\n\nReasoning: Let\'s think step by step in order to find the answer. We know that the question is asking about the movie "Tombstone", and we want to find the actor born on May 17, 1955 who starred in it.', 'query': '`Tombstone movie 1993 cast born May 17, 1955`\n\nThis query should return the answer: Val Kil'}) (input_keys=None),
Example({'question': 'Who is older, Aleksandr Danilovich Aleksandrov or Anatoly Fomenko?', 'answer': 'Aleksandr Danilovich Aleksandrov'}) (input_keys=None),
Example({'question': 'On the coast of what ocean is the birthplace of Diogal Sakho?', 'answer': 'Atlantic'}) (input_keys=None),
Example({'question': 'What is the code name for the German offensive that started this Second World War engagement on the Eastern Front (a few hundred kilometers from Moscow) between Soviet and German forces, which included 102nd Infantry Division?', 'answer': 'Operation Citadel'}) (input_keys=None),
Example({'question': 'Which Pakistani cricket umpire who won 3 consecutive ICC umpire of the year awards in 2009, 2010, and 2011 will be in the ICC World Twenty20?', 'answer': 'Aleem Sarwar Dar'}) (input_keys=None),
Example({'question': 'Who acted in the shot film The Shore and is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet." ?', 'answer': 'Kerry Condon'}) (input_keys=None),
Example({'question': 'Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field to go where? ', 'answer': 'space'}) (input_keys=None),
Example({'question': 'which American actor was Candace Kita guest starred with ', 'answer': 'Bill Murray'}) (input_keys=None),
Example({'question': 'Which American actress who made their film debut in the 1995 teen drama "Kids" was the co-founder of Voto Latino?', 'answer': 'Rosario Dawson'}) (input_keys=None),
Example({'question': 'Which of these publications was most recently published, Who Put the Bomp or Self?', 'answer': 'Self'}) (input_keys=None),
Example({'question': 'Which is taller, the Empire State Building or the Bank of America Tower?', 'answer': 'The Empire State Building'}) (input_keys=None),
Example({'question': 'This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?', 'answer': 'The Waltz King'}) (input_keys=None),
Example({'question': 'The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?', 'answer': '1950'}) (input_keys=None),
Example({'question': 'The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?', 'answer': '2010'}) (input_keys=None),
Example({'question': 'Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?', 'answer': 'Buena Vista Distribution'}) (input_keys=None),
Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys=None)],
'signature_instructions': 'Write a simple search query that will help answer a complex question.',
'signature_prefix': 'Query:',
'extended_signature_instructions': 'Write a simple search query that will help answer a complex question.',
'extended_signature_prefix': 'Query:'},
'retrieve': {'k': 3},
'generate_answer': {'lm': None,
'traces': [],
'train': [],
'demos': [Example({'augmented': True, 'context': ['Wyatt Earp: Return to Tombstone | Wyatt Earp: Return to Tombstone is a 1994 independent film starring Hugh O\'Brian as Wyatt Earp, featuring new footage mixed with colorized sequences from O\'Brian\'s 1955-1961 television series "The Life and Legend of Wyatt Earp". The supporting cast for the new footage includes Bruce Boxleitner, Paul Brinegar, Harry Carey, Jr., Bo Hopkins, and Don Meredith. The colorized flashback archival footage from the original television series features Douglas Fowley as Doc Holliday and Lloyd Corrigan as Ned Buntline. The movie was directed by Paul Landres and Frank McDonald.', 'Michael Biehn | Michael Connell Biehn (born July 31, 1956) is an American actor, primarily known for his military roles in science fiction films directed by James Cameron; as Sgt. Kyle Reese in "The Terminator" (1984), Cpl. Dwayne Hicks in "Aliens" (1986) and Lt. Coffey in "The Abyss" (1989). He was nominated for the Saturn Award for Best Actor for "Aliens." His other films include "The Fan" (1981), "K2" (1991), "Tombstone" (1993), "The Rock" (1996), "" (2001) and "Planet Terror" (2007). On television, he has appeared in "Hill Street Blues" (1984) and "Adventure Inc." (2002-03).', 'Tombstone (film) | Tombstone is a 1993 American Western film directed by George P. Cosmatos, written by Kevin Jarre (who was also the original director, but was replaced early in production), and starring Kurt Russell and Val Kilmer, with Sam Elliott, Bill Paxton, Powers Boothe, Michael Biehn, and Dana Delany in supporting roles, as well as narration by Robert Mitchum.'], 'question': 'Tombstone stared an actor born May 17, 1955 known as who?', 'rationale': "Reasoning: Let's think step by step in order to find the answer. We know that the question is asking about an actor born on May 17, 1955, who starred in the movie Tombstone. From the context, we can see that the movie Tombstone (1993) starred Kurt Russell and Val Kilmer, among others. However, none of them were born on May 17, 1955. But, we can see that Michael Biehn was also in the movie, and according to his bio, he was born on July 31, 1956, which doesn't match the birthdate we're looking for. So, we need to look further.", 'answer': 'Bill Paxton'}) (input_keys=None),
Example({'question': 'which American actor was Candace Kita guest starred with ', 'answer': 'Bill Murray'}) (input_keys=None),
Example({'question': 'Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field to go where? ', 'answer': 'space'}) (input_keys=None),
Example({'question': 'Which is taller, the Empire State Building or the Bank of America Tower?', 'answer': 'The Empire State Building'}) (input_keys=None),
Example({'question': 'This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?', 'answer': 'The Waltz King'}) (input_keys=None),
Example({'question': 'Which Pakistani cricket umpire who won 3 consecutive ICC umpire of the year awards in 2009, 2010, and 2011 will be in the ICC World Twenty20?', 'answer': 'Aleem Sarwar Dar'}) (input_keys=None),
Example({'question': 'Which of these publications was most recently published, Who Put the Bomp or Self?', 'answer': 'Self'}) (input_keys=None),
Example({'question': 'Which American actress who made their film debut in the 1995 teen drama "Kids" was the co-founder of Voto Latino?', 'answer': 'Rosario Dawson'}) (input_keys=None),
Example({'question': 'The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?', 'answer': '2010'}) (input_keys=None),
Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys=None),
Example({'question': 'Who acted in the shot film The Shore and is also the youngest actress ever to play Ophelia in a Royal Shakespeare Company production of "Hamlet." ?', 'answer': 'Kerry Condon'}) (input_keys=None),
Example({'question': 'What is the code name for the German offensive that started this Second World War engagement on the Eastern Front (a few hundred kilometers from Moscow) between Soviet and German forces, which included 102nd Infantry Division?', 'answer': 'Operation Citadel'}) (input_keys=None),
Example({'question': 'Who is older, Aleksandr Danilovich Aleksandrov or Anatoly Fomenko?', 'answer': 'Aleksandr Danilovich Aleksandrov'}) (input_keys=None),
Example({'question': 'Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?', 'answer': 'Buena Vista Distribution'}) (input_keys=None),
Example({'question': 'The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?', 'answer': '1950'}) (input_keys=None),
Example({'question': 'On the coast of what ocean is the birthplace of Diogal Sakho?', 'answer': 'Atlantic'}) (input_keys=None)],
'signature_instructions': 'Answers questions with short factoid answers',
'signature_prefix': 'Answer:',
'extended_signature_instructions': 'Answers questions with short factoid answers',
'extended_signature_prefix': 'Answer:'}}
from dspy.evaluate.evaluate import Evaluate
# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)
def gold_passages_retrieved(example, pred, trace=None):
gold_titles = set(map(dspy.evaluate.normalize_text, example['gold_titles']))
found_titles = set(map(dspy.evaluate.normalize_text, [c.split(' | ')[0] for c in pred.context]))
return gold_titles.issubset(found_titles)
compiled_rag_retrieval_score = evaluate_on_hotpotqa(compiled_rag, metric=gold_passages_retrieved)
# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(compiled_rag, metric=metric)