ragas脚本
0
import os
from openpyxl import load_workbook
from ragas import evaluate, EvaluationDataset
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import Faithfulness, ResponseRelevancy, LLMContextPrecisionWithReference
from langchain_openai import ChatOpenAI
from langchain_ollama import OllamaEmbeddings
llm = LangchainLLMWrapper(ChatOpenAI(
model = "deepseek-chat",
timeout = 600,
api_key = "sk-....",
base_url = "https://api.deepseek.com"
))
embedding = OllamaEmbeddings(
model = "bge-m3",
base_url = "http://localhost:11434"
)
index = 0
dataset = []
workbook = load_workbook(filename = "qda.xlsx")
sheet = workbook.active
for row in sheet.iter_rows(values_only = True):
index += 1
if index == 1:
continue
dataset.append(
{
"response" : row[7],
"reference" : row[6],
"user_input" : row[0],
"retrieved_contexts": ["-" if row[9] is None or len(row[9]) == 0 else row[9]],
}
)
print(f"数据大小:{len(dataset)}")
# 支持指标:https://docs.ragas.org.cn/en/stable/concepts/metrics/available_metrics/
# 支持指标:https://github.com/explodinggradients/ragas/tree/main/src/ragas/metrics
evaluation_result = evaluate(dataset = EvaluationDataset.from_list(dataset), metrics = [
Faithfulness(), # 忠实度
ResponseRelevancy(strictness = 1), # 回答相关性
LLMContextPrecisionWithReference() # 上下文精度
], llm = llm, embeddings = embedding)
index = 0
for row in sheet.iter_rows(values_only = True):
index += 1
if index == 1:
sheet.cell(index, 11, "忠实度")
sheet.cell(index, 12, "回答相关性")
sheet.cell(index, 13, "上下文精度")
continue
sheet.cell(index, 11, evaluation_result.scores[index - 2]["faithfulness"])
sheet.cell(index, 12, evaluation_result.scores[index - 2]["answer_relevancy"])
sheet.cell(index, 13, evaluation_result.scores[index - 2]["llm_context_precision_with_reference"])
workbook.save("qda-eval.xlsx")
workbook.close()