from vespa.application import Vespa
from learntorank.query import QueryModel, Ranking, OR
app = Vespa(url = "https://api.cord19.vespa.ai")
query_model = QueryModel(
match_phase = OR(),
ranking = Ranking(name="bm25", list_features=True)
)Collect training data from application
Collect training data to analyse and/or improve ranking functions
Example setup
Connect to the application and define a query model.
Define some labeled data.
labeled_data = [
{
"query_id": 0,
"query": "Intrauterine virus infections and congenital heart disease",
"relevant_docs": [{"id": 0, "score": 1}, {"id": 3, "score": 1}]
},
{
"query_id": 1,
"query": "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus",
"relevant_docs": [{"id": 1, "score": 1}, {"id": 5, "score": 1}]
}
]Collect training data in batch
from learntorank.query import collect_vespa_features
training_data_batch = collect_vespa_features(
app=app,
labeled_data = labeled_data,
id_field = "id",
query_model = query_model,
number_additional_docs = 2,
fields=["rankfeatures"]
)
training_data_batch| document_id | query_id | label | attributeMatch(authors.first) | attributeMatch(authors.first).averageWeight | attributeMatch(authors.first).completeness | attributeMatch(authors.first).fieldCompleteness | attributeMatch(authors.first).importance | attributeMatch(authors.first).matches | attributeMatch(authors.first).maxWeight | ... | textSimilarity(results).fieldCoverage | textSimilarity(results).order | textSimilarity(results).proximity | textSimilarity(results).queryCoverage | textSimilarity(results).score | textSimilarity(title).fieldCoverage | textSimilarity(title).order | textSimilarity(title).proximity | textSimilarity(title).queryCoverage | textSimilarity(title).score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.062500 | 0.0 | 0.0000 | 0.142857 | 0.055357 |
| 1 | 3 | 0 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.142857 | 0.0 | 0.4375 | 0.142857 | 0.224554 |
| 4 | 255164 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.000000 | 1.0 | 1.0000 | 1.000000 | 1.000000 |
| 5 | 120761 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.300000 | 1.0 | 1.0000 | 0.428571 | 0.688571 |
| 2 | 1 | 1 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.111111 | 0.0 | 0.0000 | 0.083333 | 0.047222 |
| 3 | 5 | 1 | 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.083333 | 0.0 | 0.0000 | 0.083333 | 0.041667 |
| 8 | 232555 | 1 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.000000 | 1.0 | 1.0000 | 1.000000 | 1.000000 |
| 9 | 13944 | 1 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.187500 | 1.0 | 1.0000 | 0.250000 | 0.612500 |
8 rows × 1038 columns