from vespa.application import Vespa
from learntorank.query import QueryModel, Ranking, OR
app = Vespa(url = "https://api.cord19.vespa.ai")
query_model = QueryModel(
match_phase = OR(),
ranking = Ranking(name="bm25", list_features=True))Evaluate application
Example setup
Connect to the application and define a query model.
Labeled data
Define some labeled data. pyvespa expects labeled data to follow the format illustrated below. It is a list of dict where each dict represents a query containing query_id, query and a list of relevant_docs. Each relevant document contain a required id key and an optional score key.
labeled_data = [
{
"query_id": 0,
"query": "Intrauterine virus infections and congenital heart disease",
"relevant_docs": [{"id": 0, "score": 1}, {"id": 3, "score": 1}]
},
{
"query_id": 1,
"query": "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus",
"relevant_docs": [{"id": 1, "score": 1}, {"id": 5, "score": 1}]
}
]Define metrics
from learntorank.evaluation import MatchRatio, Recall, ReciprocalRank
eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)]Evaluate in batch
from learntorank.evaluation import evaluate
evaluation = evaluate(
app=app,
labeled_data = labeled_data,
eval_metrics = eval_metrics,
query_model = query_model,
id_field = "id",
)
evaluation| model | default_name | |
|---|---|---|
| match_ratio | mean | 0.853456 |
| median | 0.853456 | |
| std | 0.055199 | |
| recall_10 | mean | 0.000000 |
| median | 0.000000 | |
| std | 0.000000 | |
| reciprocal_rank_10 | mean | 0.000000 |
| median | 0.000000 | |
| std | 0.000000 |
Evaluate specific query
You can have finer control with the
evaluate_querymethod.
from pandas import concat, DataFrame
from learntorank.evaluation import evaluate_query
evaluation = []
for query_data in labeled_data:
query_evaluation = evaluate_query(
app=app,
eval_metrics = eval_metrics,
query_model = query_model,
query_id = query_data["query_id"],
query = query_data["query"],
id_field = "id",
relevant_docs = query_data["relevant_docs"],
default_score = 0
)
evaluation.append(query_evaluation)
evaluation = DataFrame.from_records(evaluation)
evaluation| model | query_id | match_ratio | recall_10 | reciprocal_rank_10 | |
|---|---|---|---|---|---|
| 0 | default_name | 0 | 0.814425 | 0.0 | 0 |
| 1 | default_name | 1 | 0.892487 | 0.0 | 0 |
Evaluate query under specific document ids
Use recall to specify which documents should be inlcuded in the evaluation
In the example below, we include documents with id equal to 0, 1 and 2. Since the relevant documents for this query are the documents with id 0 and 3, we should get recall equal to 0.5.
query_evaluation = evaluate_query(
app=app,
eval_metrics = eval_metrics,
query_model = query_model,
query_id = 0,
query = "Intrauterine virus infections and congenital heart disease",
id_field = "id",
relevant_docs = [{"id": 0, "score": 1}, {"id": 3, "score": 1}],
default_score = 0,
recall = ("id", [0, 1, 2])
)
query_evaluation{'model': 'default_name',
'query_id': 0,
'match_ratio': 9.70242657688688e-06,
'recall_10': 0.5,
'reciprocal_rank_10': 1.0}
We now include documents with id equal to 0, 1, 2 and 3. This should give a recall equal to 1.
query_evaluation = evaluate_query(
app=app,
eval_metrics = eval_metrics,
query_model = query_model,
query_id = 0,
query = "Intrauterine virus infections and congenital heart disease",
id_field = "id",
relevant_docs = [{"id": 0, "score": 1}, {"id": 3, "score": 1}],
default_score = 0,
recall = ("id", [0, 1, 2, 3])
)
query_evaluation{'model': 'default_name',
'query_id': 0,
'match_ratio': 1.2936568769182506e-05,
'recall_10': 1.0,
'reciprocal_rank_10': 1.0}