-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathtrigger_eval.py
More file actions
115 lines (97 loc) · 3.32 KB
/
trigger_eval.py
File metadata and controls
115 lines (97 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from openai import OpenAI
import os
from trulens_eval import TruLlama, FeedbackMode, Feedback, Tru
from trulens_eval.feedback import Groundedness
from trulens_eval import OpenAI as fOpenAI
from llama_index import StorageContext, load_index_from_storage
import streamlit as st
import numpy as np
import google.auth
import pandas as pd
import nest_asyncio
from datetime import datetime
import json
def write_file():
key_path = st.secrets["JSON_PATH"]
with open(key_path, "w+") as f:
creds = {
"type": st.secrets["type"],
"project_id": st.secrets["project_id"],
"private_key_id": st.secrets["private_key_id"],
"private_key": st.secrets["private_key"],
"client_email": st.secrets["client_email"],
"client_id": st.secrets["client_id"],
"auth_uri": st.secrets["auth_uri"],
"token_uri": st.secrets["token_uri"],
"auth_provider_x509_cert_url": st.secrets["auth_provider_x509_cert_url"],
"client_x509_cert_url": st.secrets["client_x509_cert_url"],
"universe_domain": st.secrets["universe_domain"]
}
json.dump(creds, f)
# Authenticate with Google Cloud
write_file()
key_path = st.secrets["JSON_PATH"]
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
credentials, project_id = google.auth.default()
# Authenticate with OpenAI
os.environ["OPENAI_API_KEY"] = st.secrets["OPEN_AI_API_KEY"]
# Rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="ask_priya_index")
# Load index from the storage context
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine()
# Define metrics
provider = fOpenAI()
f_qa_relevance = Feedback(
provider.relevance_with_cot_reasons,
name="Answer Relevance"
).on_input_output()
context_selection = TruLlama.select_source_nodes().node.text
f_qs_relevance = (
Feedback(provider.qs_relevance_with_cot_reasons,
name="Context Relevance")
.on_input()
.on(context_selection)
.aggregate(np.mean)
)
grounded = Groundedness(groundedness_provider=provider)
f_groundedness = (
Feedback(grounded.groundedness_measure_with_cot_reasons,
name="Groundedness"
)
.on(context_selection)
.on_output()
.aggregate(grounded.grounded_statements_aggregator)
)
tru = Tru(database_redact_keys = True)
# To clear data base of previous evalutions, uncomment below
# tru.reset_database()
# Init a recorder named by date evaluated
cur_datetime = datetime.now()
cur_datetime_str = cur_datetime.strftime("%Y-%m-%d %H:%M:%S")
tru_recorder = TruLlama(
query_engine,
app_id="Engine " + cur_datetime_str,
feedbacks=[
f_qa_relevance,
f_qs_relevance,
f_groundedness
]
)
# Load evaluation questions
eval_questions = []
with open('data/eval_questions.txt', 'r') as file:
for line in file:
# Remove newline character and convert to integer
item = line.strip()
eval_questions.append(item)
# Run evaluation engine on each eval question
for question in eval_questions:
with tru_recorder as recording:
query_engine.query(question)
records, feedback = tru.get_records_and_feedback(app_ids=[])
# Print performance
print(tru.get_leaderboard(app_ids=[]))
# Open Streamlit dashboard
# nest_asyncio.apply()
# tru.run_dashboard()