implement optional telemetry gathering

This commit is contained in:
Senko Rasic
2023-11-14 18:38:53 +01:00
parent a10566f22e
commit bec621261c
6 changed files with 81 additions and 9 deletions

View File

@@ -24,6 +24,7 @@ from database.models.files import File
from logger.logger import logger
from utils.dot_gpt_pilot import DotGptPilot
from utils.telemetry import telemetry
class Project:
def __init__(self, args, name=None, project_description=None, clarifications=None, user_stories=None,
@@ -81,6 +82,7 @@ class Project:
"""
Start the project.
"""
telemetry.start()
self.project_manager = ProductOwner(self)
self.project_manager.get_project_description()

View File

@@ -17,6 +17,8 @@ from utils.exit import exit_gpt_pilot
from logger.logger import logger
from database.database import database_exists, create_database, tables_exist, create_tables, get_created_apps_with_steps
from utils.settings import settings, loader
from utils.telemetry import telemetry
def init():
# Check if the "euclid" database exists, if not, create it
@@ -42,9 +44,9 @@ if __name__ == "__main__":
# Override the built-in 'open' with our version
builtins.open = get_custom_open
# sys.argv.append('--ux-test=' + 'continue_development')
args = init()
builtins.print, ipc_client_instance = get_custom_print(args)
@@ -66,6 +68,13 @@ if __name__ == "__main__":
run_test(args['--ux-test'], args)
run_exit_fn = False
else:
if settings.telemetry is None:
telemetry.setup()
loader.save("telemetry")
if args.get("app_id"):
telemetry.set("is_continuation", True)
# TODO get checkpoint from database and fill the project with it
project = Project(args, ipc_client_instance=ipc_client_instance)
project.start()

View File

@@ -249,7 +249,12 @@ def test_send_enabled_and_successful(mock_settings, mock_post, caplog):
telemetry = Telemetry()
telemetry.send()
mock_post.assert_called_once_with("test-endpoint", json=telemetry.data)
expected = {
"pathId": "test-id",
"event": "pilot-telemetry",
"data": telemetry.data,
}
mock_post.assert_called_once_with("test-endpoint", json=expected)
assert "sending anonymous telemetry data to test-endpoint" in caplog.text
@@ -266,7 +271,12 @@ def test_send_enabled_but_post_fails(mock_settings, mock_post):
telemetry = Telemetry()
telemetry.send()
mock_post.assert_called_once_with(telemetry.endpoint, json=telemetry.data)
expected = {
"pathId": "test-id",
"event": "pilot-telemetry",
"data": telemetry.data,
}
mock_post.assert_called_once_with(telemetry.endpoint, json=expected)
@patch("utils.telemetry.requests.post")

View File

@@ -6,8 +6,10 @@ import requests
from helpers.cli import terminate_running_processes
from utils.questionary import styled_text
from utils.telemetry import telemetry
def send_telemetry(path_id):
# Prepare the telemetry data
telemetry_data = {
"pathId": path_id,
@@ -60,6 +62,7 @@ def ask_to_store_prompt(project, path_id):
try:
answer = styled_text(project, question, ignore_user_input_count=True)
if answer == '':
telemetry.set("initial_prompt", init_prompt)
response = requests.post("https://api.pythagora.io/telemetry", json=telemetry_data)
response.raise_for_status()
except requests.RequestException as err:
@@ -67,15 +70,29 @@ def ask_to_store_prompt(project, path_id):
def ask_user_feedback(project, path_id, ask_feedback):
question = ('How did GPT Pilot do? Were you able to create any app that works? Please write any feedback you have '
'or just press ENTER to exit:')
question = ('Were you able to create any app that works? Please write any feedback you have or just press ENTER to exit:')
feedback = None
if ask_feedback:
feedback = styled_text(project, question, ignore_user_input_count=True)
if feedback: # only send if user provided feedback
telemetry.set("user_feedback", feedback)
send_feedback(feedback, path_id)
def ask_user_email(project, path_id, ask_feedback):
if not ask_feedback:
return False
question = (
"How did GPT Pilot do? We'd love to talk with you and hear your thoughts. "
"If you'd like to be contacted by us, please provide your email address, or just press ENTER to exit:"
)
feedback = styled_text(project, question, ignore_user_input_count=True)
if feedback: # only send if user provided feedback
telemetry.set("user_contact", feedback)
return True
return False
def exit_gpt_pilot(project, ask_feedback=True):
terminate_running_processes()
path_id = get_path_id()
@@ -84,6 +101,16 @@ def exit_gpt_pilot(project, ask_feedback=True):
ask_to_store_prompt(project, path_id)
ask_user_feedback(project, path_id, ask_feedback)
# Ask user for contact email. If they choose not to provide it, ask for direct
# feedback. Both are optional.
if not ask_user_email(project, path_id, ask_feedback):
ask_user_feedback(project, path_id, ask_feedback)
# We can figure out if we're done, but can't distinguish between crash and interrupted, yet
telemetry.set("end_result", "success" if (project is not None and project.finished) else None)
telemetry.set("num_commands", project.command_runs_count if project is not None else 0)
telemetry.set("num_inputs", project.user_inputs_count if project is not None else 0)
telemetry.send()
print('Exit', type='exit')

View File

@@ -17,6 +17,8 @@ from utils.utils import fix_json, get_prompt
from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType
from utils.questionary import styled_text
from .telemetry import telemetry
def get_tokens_in_messages(messages: List[str]) -> int:
tokenizer = tiktoken.get_encoding("cl100k_base") # GPT-4 tokenizer
tokenized_messages = [tokenizer.encode(message['content']) for message in messages]
@@ -301,6 +303,10 @@ def stream_gpt_completion(data, req_type, project):
model = os.getenv('MODEL_NAME', 'gpt-4')
endpoint = os.getenv('ENDPOINT')
# This will be set many times but we don't care, as there are no side-effects to it.
telemetry.set("model", model)
telemetry.inc("num_llm_requests")
logger.info(f'> Request model: {model}')
if logger.isEnabledFor(logging.DEBUG):
logger.debug('\n'.join([f"{message['role']}: {message['content']}" for message in data['messages']]))

View File

@@ -76,6 +76,8 @@ class Telemetry:
"pilot_version": version,
# LLM used
"model": None,
# Initial prompt
"initial_prompt": None,
# Number of LLM requests made
"num_llm_requests": 0,
# Number of tokens used for LLM requests
@@ -97,6 +99,13 @@ class Telemetry:
# Optional user contact email
"user_contact": None,
}
if sys.platform == "linux":
try:
import distro
self.data["linux_distro"] = distro.name(pretty=True)
except Exception as err:
log.debug(f"Error getting Linux distribution info: {err}", exc_info=True)
self.start_time = None
self.end_time = None
@@ -187,7 +196,7 @@ class Telemetry:
self.end_time = time.time()
self.data["elapsed_time"] = self.end_time - self.start_time
def send(self):
def send(self, event:str = "pilot-telemetry"):
"""
Send telemetry data to the phone-home endpoint.
@@ -200,11 +209,20 @@ class Telemetry:
log.error("Telemetry.send(): cannot send telemetry, no endpoint configured")
return
if self.start_time is not None and self.end_time is None:
self.stop()
payload = {
"pathId": self.telemetry_id,
"event": event,
"data": self.data,
}
log.debug(
f"Telemetry.send(): sending anonymous telemetry data to {self.endpoint}"
)
try:
requests.post(self.endpoint, json=self.data)
requests.post(self.endpoint, json=payload)
except Exception as e:
log.error(
f"Telemetry.send(): failed to send telemetry data: {e}", exc_info=True